Kumar
commited on
Commit
·
0077a91
1
Parent(s):
628a299
first commit
Browse files- app.py +44 -0
- config.py +5 -0
- model/__pycache__/load_model.cpython-312.pyc +0 -0
- model/load_model.py +20 -0
- predictor/__pycache__/predict.cpython-312.pyc +0 -0
- predictor/predict.py +18 -0
- requirements.txt +10 -0
- utils/__pycache__/face_utils.cpython-312.pyc +0 -0
- utils/__pycache__/gradcam.cpython-312.pyc +0 -0
- utils/__pycache__/video_utils.cpython-312.pyc +0 -0
- utils/face_utils.py +20 -0
- utils/gradcam.py +42 -0
- utils/video_utils.py +16 -0
app.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from model.load_model import load_model
|
3 |
+
from utils.video_utils import extract_frames
|
4 |
+
from utils.face_utils import extract_faces
|
5 |
+
from predictor.predict import predict_faces
|
6 |
+
from utils.gradcam import get_gradcam, get_conv_layers
|
7 |
+
import numpy as np
|
8 |
+
from PIL import Image
|
9 |
+
from tqdm import tqdm
|
10 |
+
|
11 |
+
model = load_model()
|
12 |
+
conv_layer_names = get_conv_layers(model) # populate dropdown choices
|
13 |
+
|
14 |
+
def deepfake_app(video, selected_layer, progress=gr.Progress(track_tqdm=True)):
|
15 |
+
frames = extract_frames(video)
|
16 |
+
frames = list(frames)
|
17 |
+
|
18 |
+
faces = extract_faces(frames)
|
19 |
+
faces = list(progress.tqdm(faces, desc="Detecting faces"))
|
20 |
+
|
21 |
+
if not faces:
|
22 |
+
return "No face detected", None
|
23 |
+
|
24 |
+
predictions = predict_faces(model, faces)
|
25 |
+
predictions = list(progress.tqdm(predictions, desc="Running predictions"))
|
26 |
+
|
27 |
+
avg_score = np.mean(predictions)
|
28 |
+
label = "FAKE" if avg_score > 0.5 else "REAL"
|
29 |
+
|
30 |
+
max_idx = np.argmax(predictions)
|
31 |
+
cam_image = get_gradcam(model, faces[max_idx], selected_layer)
|
32 |
+
cam_image = Image.fromarray(cam_image)
|
33 |
+
|
34 |
+
return label, cam_image
|
35 |
+
|
36 |
+
gr.Interface(
|
37 |
+
fn=deepfake_app,
|
38 |
+
inputs=[gr.Video(label="Upload a Video"),
|
39 |
+
gr.Dropdown(choices=conv_layer_names, label="Grad-CAM Layer", value=conv_layer_names[-1])
|
40 |
+
],
|
41 |
+
outputs=["text", "image"],
|
42 |
+
title="Deepfake Detection with XceptionNet",
|
43 |
+
description="Upload a video, and the model will predict if it contains a deepfake with RAI explainability using GRADCAM."
|
44 |
+
).launch()
|
config.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
MODEL_PATH = "xception_best_model.pth"
|
4 |
+
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
5 |
+
FRAME_SKIP = 5 # Every 5th frame
|
model/__pycache__/load_model.cpython-312.pyc
ADDED
Binary file (1.41 kB). View file
|
|
model/load_model.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import timm
|
4 |
+
from config import MODEL_PATH, DEVICE
|
5 |
+
|
6 |
+
def load_model():
|
7 |
+
model = timm.create_model('xception', pretrained=False)
|
8 |
+
model.fc = nn.Sequential(
|
9 |
+
nn.Linear(model.fc.in_features, 100),
|
10 |
+
nn.ReLU(),
|
11 |
+
nn.Dropout(0.7),
|
12 |
+
nn.Linear(100, 50),
|
13 |
+
nn.ReLU(),
|
14 |
+
nn.Dropout(0.7),
|
15 |
+
nn.Linear(50, 1),
|
16 |
+
nn.Sigmoid()
|
17 |
+
)
|
18 |
+
model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))
|
19 |
+
model.to(DEVICE).eval()
|
20 |
+
return model
|
predictor/__pycache__/predict.cpython-312.pyc
ADDED
Binary file (1.22 kB). View file
|
|
predictor/predict.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from torchvision import transforms
|
3 |
+
from config import DEVICE
|
4 |
+
|
5 |
+
def predict_faces(model, faces):
|
6 |
+
transform = transforms.Compose([
|
7 |
+
transforms.Resize((299, 299)),
|
8 |
+
transforms.ToTensor(),
|
9 |
+
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
|
10 |
+
])
|
11 |
+
|
12 |
+
predictions = []
|
13 |
+
for face in faces:
|
14 |
+
face = transform(face).unsqueeze(0).to(DEVICE)
|
15 |
+
with torch.no_grad():
|
16 |
+
pred = model(face).item()
|
17 |
+
predictions.append(pred)
|
18 |
+
return predictions
|
requirements.txt
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch
|
2 |
+
torchvision
|
3 |
+
timm
|
4 |
+
opencv-python
|
5 |
+
pillow
|
6 |
+
facenet-pytorch
|
7 |
+
gradio
|
8 |
+
tqdm
|
9 |
+
#pytorch-grad-cam
|
10 |
+
numpy
|
utils/__pycache__/face_utils.cpython-312.pyc
ADDED
Binary file (1.43 kB). View file
|
|
utils/__pycache__/gradcam.cpython-312.pyc
ADDED
Binary file (2.39 kB). View file
|
|
utils/__pycache__/video_utils.cpython-312.pyc
ADDED
Binary file (854 Bytes). View file
|
|
utils/face_utils.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from facenet_pytorch import MTCNN
|
2 |
+
from PIL import Image
|
3 |
+
import torch
|
4 |
+
import gradio as gr
|
5 |
+
|
6 |
+
mtcnn = MTCNN(margin=0, thresholds=[0.85, 0.95, 0.95], device=torch.device("cuda" if torch.cuda.is_available() else "cpu"))
|
7 |
+
|
8 |
+
def extract_faces(frames, progress=gr.Progress(track_tqdm=True)):
|
9 |
+
face_crops = []
|
10 |
+
for frame in progress.tqdm(frames, desc='Detecting faces'):
|
11 |
+
img = Image.fromarray(frame)
|
12 |
+
|
13 |
+
boxes, _ = mtcnn.detect(img)
|
14 |
+
|
15 |
+
if boxes is not None:
|
16 |
+
for i, box in enumerate(boxes):
|
17 |
+
x1, y1, x2, y2 = [int(b) for b in box]
|
18 |
+
face = img.crop((x1, y1, x2, y2))
|
19 |
+
face_crops.append(face)
|
20 |
+
return face_crops
|
utils/gradcam.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import numpy as np
|
3 |
+
import torchvision.transforms as transforms
|
4 |
+
from pytorch_grad_cam import GradCAM
|
5 |
+
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
|
6 |
+
from pytorch_grad_cam.utils.image import show_cam_on_image
|
7 |
+
|
8 |
+
def get_conv_layers(model):
|
9 |
+
conv_layers = []
|
10 |
+
for name, module in model.named_modules():
|
11 |
+
if isinstance(module, torch.nn.Conv2d):
|
12 |
+
conv_layers.append(name)
|
13 |
+
return conv_layers
|
14 |
+
|
15 |
+
|
16 |
+
def get_gradcam(model, face_pil_image, layer_name):
|
17 |
+
model.eval()
|
18 |
+
|
19 |
+
# Preprocess: convert PIL Image to normalized tensor
|
20 |
+
transform = transforms.Compose([
|
21 |
+
transforms.Resize((299, 299)), # Xception input size
|
22 |
+
transforms.ToTensor(),
|
23 |
+
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # match training normalization
|
24 |
+
])
|
25 |
+
face_tensor = transform(face_pil_image).unsqueeze(0).to(next(model.parameters()).device)
|
26 |
+
|
27 |
+
# Convert image to numpy for overlay
|
28 |
+
face_np = np.array(face_pil_image.resize((299, 299))) / 255.0 # shape: (H, W, C)
|
29 |
+
if face_np.shape[-1] == 1:
|
30 |
+
face_np = np.repeat(face_np, 3, axis=-1)
|
31 |
+
|
32 |
+
# Grad-CAM
|
33 |
+
# Dynamically get layer by name
|
34 |
+
target_module = dict(model.named_modules())[layer_name]
|
35 |
+
cam = GradCAM(model=model, target_layers=[target_module])
|
36 |
+
grayscale_cam = cam(input_tensor=face_tensor, targets=[ClassifierOutputTarget(0)])
|
37 |
+
|
38 |
+
# Overlay the cam on image
|
39 |
+
grayscale_cam = grayscale_cam[0] # only one image in batch
|
40 |
+
visualization = show_cam_on_image(face_np, grayscale_cam, use_rgb=True)
|
41 |
+
|
42 |
+
return visualization
|
utils/video_utils.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
from config import FRAME_SKIP
|
3 |
+
|
4 |
+
def extract_frames(video_path):
|
5 |
+
cap = cv2.VideoCapture(video_path)
|
6 |
+
frames = []
|
7 |
+
i = 0
|
8 |
+
while True:
|
9 |
+
ret, frame = cap.read()
|
10 |
+
if not ret:
|
11 |
+
break
|
12 |
+
if i % FRAME_SKIP == 0:
|
13 |
+
frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
14 |
+
i += 1
|
15 |
+
cap.release()
|
16 |
+
return frames
|