Spaces:

hiyata
/

HostClassifier

Running

App Files Files Community

hiyata commited on Jan 12

Commit

9a00943

verified ·

1 Parent(s): 552aec4

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -29

app.py CHANGED Viewed

@@ -148,20 +148,17 @@ def find_extreme_subregion(shap_means, window_size=500, mode="max"):
         avg_val = np.mean(shap_means) if n > 0 else 0.0
         return (0, n, avg_val)
-    # Rolling sum approach
-    csum = np.cumsum(shap_means)  # csum[i] = sum of shap_means[0..i-1]
-    # function to compute sum in [start, start+window_size)
     def window_sum(start):
         end = start + window_size
         return csum[end] - csum[start]
     best_start = 0
-    best_avg = None
     # Initialize the best with the first window
     best_sum = window_sum(0)
     best_avg = best_sum / window_size
-    best_start = 0
     for start in range(1, n - window_size + 1):
         wsum = window_sum(start)
@@ -195,7 +192,10 @@ def plot_linear_heatmap(shap_means, title="Per-base SHAP Heatmap", start=None, e
     Plots a 1D heatmap of per-base SHAP contributions.
     Negative = push toward Non-Human, Positive = push toward Human.
     Optionally can show only a subrange (start:end).
-    We'll adjust layout so that the colorbar is below the x-axis and doesn't overlap.
     """
     if start is not None and end is not None:
         shap_means = shap_means[start:end]
@@ -208,16 +208,16 @@ def plot_linear_heatmap(shap_means, title="Per-base SHAP Heatmap", start=None, e
     fig, ax = plt.subplots(figsize=(12, 2))
     cax = ax.imshow(heatmap_data, aspect='auto', cmap='RdBu_r')
-    # Adjust colorbar with some extra margin
-    # We'll place the colorbar horizontally below
-    cbar = plt.colorbar(cax, orientation='horizontal', pad=0.25)
     cbar.set_label('SHAP Contribution')
     ax.set_yticks([])
     ax.set_xlabel('Position in Sequence')
     ax.set_title(f"{title}{subtitle}")
-    # Additional spacing at bottom to avoid overlap
-    plt.subplots_adjust(bottom=0.3)
     return fig
@@ -280,14 +280,14 @@ def analyze_sequence(file_obj, top_kmers=10, fasta_text="", window_size=500):
             with open(file_obj, 'r') as f:
                 text = f.read()
         except Exception as e:
-            return (f"Error reading file: {str(e)}", None, None, None, None, None)
     else:
-        return ("Please provide a FASTA sequence.", None, None, None, None, None)
     # Parse FASTA
     sequences = parse_fasta(text)
     if not sequences:
-        return ("No valid FASTA sequences found.", None, None, None, None, None)
     header, seq = sequences[0]
@@ -298,7 +298,7 @@ def analyze_sequence(file_obj, top_kmers=10, fasta_text="", window_size=500):
         model.load_state_dict(torch.load('model.pt', map_location=device))
         scaler = joblib.load('scaler.pkl')
     except Exception as e:
-        return (f"Error loading model: {str(e)}", None, None, None, None, None)
     # Vectorize + scale
     freq_vector = sequence_to_kmer_vector(seq)
@@ -343,20 +343,14 @@ def analyze_sequence(file_obj, top_kmers=10, fasta_text="", window_size=500):
     heatmap_fig = plot_linear_heatmap(shap_means, title="Genome-wide SHAP")
     heatmap_img = fig_to_image(heatmap_fig)
-    # Return:
-    #   1) results text
-    #   2) k-mer bar image
-    #   3) full-genome heatmap
-    #   4) "state" with { seq, shap_means, header }, for subregion analysis
-    #   5) we also return "most pushing" subregion info if we want
-    #      but for simplicity, we can just keep them in the text.
-    #   6) the sequence header
     state_dict = {
         "seq": seq,
         "shap_means": shap_means
     }
-    return (results_text, bar_img, heatmap_img, state_dict, header, None)
 ###############################################################################
 # 8. SUBREGION ANALYSIS (Gradio Step 2)
@@ -481,21 +475,20 @@ with gr.Blocks(css=css) as iface:
                 kmer_img = gr.Image(label="Top k-mer SHAP")
                 genome_img = gr.Image(label="Genome-wide SHAP Heatmap")
-        # Hidden states that store data for step 2
         seq_state = gr.State()
         header_state = gr.State()
-        # The "analyze_sequence" function returns 6 values, which we map here:
         # 1) results_text
         # 2) bar_img
         # 3) heatmap_img
         # 4) state_dict
         # 5) header
-        # 6) None placeholder
         analyze_btn.click(
             analyze_sequence,
             inputs=[file_input, top_k, text_input, win_size],
-            outputs=[results_box, kmer_img, genome_img, seq_state, header_state, None]
         )
     with gr.Tab("2) Subregion Exploration"):

         avg_val = np.mean(shap_means) if n > 0 else 0.0
         return (0, n, avg_val)
+    # For efficiency, we can do a rolling sum approach
+    csum = np.cumsum(shap_means)
+    # csum[i] = sum of shap_means[0..i-1]
     def window_sum(start):
         end = start + window_size
         return csum[end] - csum[start]
     best_start = 0
     # Initialize the best with the first window
     best_sum = window_sum(0)
     best_avg = best_sum / window_size
     for start in range(1, n - window_size + 1):
         wsum = window_sum(start)
     Plots a 1D heatmap of per-base SHAP contributions.
     Negative = push toward Non-Human, Positive = push toward Human.
     Optionally can show only a subrange (start:end).
+    We adjust layout so the colorbar is well below the x-axis:
+      - orientation='horizontal', pad=0.35
+      - plt.subplots_adjust(bottom=0.4)
     """
     if start is not None and end is not None:
         shap_means = shap_means[start:end]
     fig, ax = plt.subplots(figsize=(12, 2))
     cax = ax.imshow(heatmap_data, aspect='auto', cmap='RdBu_r')
+    # Place colorbar below and add extra margin
+    cbar = plt.colorbar(cax, orientation='horizontal', pad=0.35)
     cbar.set_label('SHAP Contribution')
     ax.set_yticks([])
     ax.set_xlabel('Position in Sequence')
     ax.set_title(f"{title}{subtitle}")
+    # Extra bottom margin so colorbar won't overlap x-axis labels
+    plt.subplots_adjust(bottom=0.4)
     return fig
             with open(file_obj, 'r') as f:
                 text = f.read()
         except Exception as e:
+            return (f"Error reading file: {str(e)}", None, None, None, None)
     else:
+        return ("Please provide a FASTA sequence.", None, None, None, None)
     # Parse FASTA
     sequences = parse_fasta(text)
     if not sequences:
+        return ("No valid FASTA sequences found.", None, None, None, None)
     header, seq = sequences[0]
         model.load_state_dict(torch.load('model.pt', map_location=device))
         scaler = joblib.load('scaler.pkl')
     except Exception as e:
+        return (f"Error loading model: {str(e)}", None, None, None, None)
     # Vectorize + scale
     freq_vector = sequence_to_kmer_vector(seq)
     heatmap_fig = plot_linear_heatmap(shap_means, title="Genome-wide SHAP")
     heatmap_img = fig_to_image(heatmap_fig)
+    # Store data for subregion analysis
     state_dict = {
         "seq": seq,
         "shap_means": shap_means
     }
+    # We now return 5 items (not 6):
+    return (results_text, bar_img, heatmap_img, state_dict, header)
 ###############################################################################
 # 8. SUBREGION ANALYSIS (Gradio Step 2)
                 kmer_img = gr.Image(label="Top k-mer SHAP")
                 genome_img = gr.Image(label="Genome-wide SHAP Heatmap")
+        # State for step 2
         seq_state = gr.State()
         header_state = gr.State()
+        # analyze_sequence(...) now returns 5 items, so we have 5 outputs.
         # 1) results_text
         # 2) bar_img
         # 3) heatmap_img
         # 4) state_dict
         # 5) header
         analyze_btn.click(
             analyze_sequence,
             inputs=[file_input, top_k, text_input, win_size],
+            outputs=[results_box, kmer_img, genome_img, seq_state, header_state]
         )
     with gr.Tab("2) Subregion Exploration"):