Spaces:

hiyata
/

HostClassifier

Running

App Files Files Community

hiyata commited on Feb 27

Commit

b7c4f98

verified ·

1 Parent(s): af1df76

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -16

app.py CHANGED Viewed

@@ -85,24 +85,62 @@ def sequence_to_kmer_vector(sequence: str, k: int = 4) -> np.ndarray:
 # 3. SHAP-VALUE (ABLATION) CALCULATION
 ###############################################################################
-def calculate_shap_values(model, x_tensor):
     model.eval()
     with torch.no_grad():
-        baseline_output = model(x_tensor)
-        baseline_probs = torch.softmax(baseline_output, dim=1)
-        baseline_prob = baseline_probs[0, 1].item()  # Prob of 'human'
-        shap_values = []
-        x_zeroed = x_tensor.clone()
-        for i in range(x_tensor.shape[1]):
-            original_val = x_zeroed[0, i].item()
-            x_zeroed[0, i] = 0.0
-            output = model(x_zeroed)
-            probs = torch.softmax(output, dim=1)
-            prob = probs[0, 1].item()
-            shap_values.append(baseline_prob - prob)
-            x_zeroed[0, i] = original_val
-    return np.array(shap_values), baseline_prob
 ###############################################################################

 # 3. SHAP-VALUE (ABLATION) CALCULATION
 ###############################################################################
+def calculate_shap_values(model, x_tensor, baseline=None, steps=50):
+    """
+    Calculate feature attributions using Integrated Gradients.
+    Args:
+        model: A PyTorch model.
+        x_tensor: Input tensor of shape (1, num_features).
+        baseline: Tensor of the same shape as x_tensor to use as the reference.
+                  If None, defaults to a tensor of zeros.
+        steps: Number of steps in the Riemann approximation of the integral.
+    Returns:
+        attributions: A numpy array of shape (num_features,) with feature attributions.
+        baseline_prob: The model's predicted probability for the target class (human)
+                       when using the baseline input.
+    """
     model.eval()
+    if baseline is None:
+        baseline = torch.zeros_like(x_tensor)
+    # Generate interpolated inputs between the baseline and the actual input.
+    scaled_inputs = [
+        baseline + (float(i) / steps) * (x_tensor - baseline)
+        for i in range(steps + 1)
+    ]
+    scaled_inputs = torch.cat(scaled_inputs, dim=0)  # shape: (steps+1, num_features)
+    scaled_inputs.requires_grad = True
+    # Forward pass: compute model outputs for all interpolated inputs.
+    outputs = model(scaled_inputs)  # shape: (steps+1, num_classes)
+    probs = torch.softmax(outputs, dim=1)[:, 1]  # probability for the 'human' class
+    # Backward pass: compute gradients of the probability with respect to the inputs.
+    grads = torch.autograd.grad(
+        outputs=probs,
+        inputs=scaled_inputs,
+        grad_outputs=torch.ones_like(probs),
+        create_graph=False,
+        retain_graph=False
+    )[0]  # shape: (steps+1, num_features)
+    # Approximate the integral using the trapezoidal rule.
+    # Compute the average gradient between consecutive steps.
+    avg_grads = (grads[:-1] + grads[1:]) / 2.0
+    # Average the gradients over all steps.
+    integrated_grad = avg_grads.mean(dim=0, keepdim=True)  # shape: (1, num_features)
+    # Scale the integrated gradients by the difference between the input and the baseline.
+    attributions = (x_tensor - baseline) * integrated_grad  # shape: (1, num_features)
+    # Compute the baseline probability (for reference)
     with torch.no_grad():
+        baseline_output = model(baseline)
+        baseline_prob = torch.softmax(baseline_output, dim=1)[0, 1].item()
+    return attributions.squeeze().cpu().numpy(), baseline_prob
 ###############################################################################