danielkovtun commited on
Commit
7d9b175
·
1 Parent(s): 722b2f2

feat: add example evaluate endpoint to fetch metrics for a single prediction given model and system ID

Browse files
Files changed (1) hide show
  1. inference_app.py +141 -70
inference_app.py CHANGED
@@ -3,15 +3,20 @@ import time
3
  from pathlib import Path
4
 
5
  import numpy as np
 
6
  from biotite.structure.atoms import AtomArrayStack
7
  from scipy.spatial.transform import Rotation as R
8
- from pinder.core.structure.atoms import atom_array_from_pdb_file, normalize_orientation, write_pdb
 
9
  from pinder.core.structure.contacts import get_stack_contacts
 
 
10
 
11
  import gradio as gr
12
 
13
  from gradio_molecule3d import Molecule3D
14
 
 
15
 
16
  def predict(
17
  receptor_pdb: Path,
@@ -22,10 +27,10 @@ def predict(
22
  start_time = time.time()
23
  # Do inference here
24
  # return an output pdb file with the protein and two chains R and L.
25
- receptor = atom_array_from_pdb_file(receptor_pdb, extra_fields=["b_factor"])
26
- ligand = atom_array_from_pdb_file(ligand_pdb, extra_fields=["b_factor"])
27
- receptor = normalize_orientation(receptor)
28
- ligand = normalize_orientation(ligand)
29
 
30
  # Number of random poses to generate
31
  M = 50
@@ -69,79 +74,145 @@ def predict(
69
  # System ID
70
  pdb_name = Path(receptor_pdb).stem + "--" + Path(ligand_pdb).name
71
  output_pdb = output_dir / pdb_name
72
- write_pdb(best_pose, output_pdb)
73
  end_time = time.time()
74
  run_time = end_time - start_time
75
  return str(output_pdb), run_time
76
 
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  with gr.Blocks() as app:
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
- gr.Markdown("# Template for inference")
 
 
 
81
 
82
- gr.Markdown("Title, description, and other information about the model")
83
- with gr.Row():
84
- with gr.Column():
85
- input_protein_1 = gr.File(label="Input Protein 1 monomer (PDB)")
86
- input_fasta_1 = gr.File(label="Input Protein 1 monomer sequence (FASTA)")
87
- with gr.Column():
88
- input_protein_2 = gr.File(label="Input Protein 2 monomer (PDB)")
89
- input_fasta_2 = gr.File(label="Input Protein 2 monomer sequence (FASTA)")
90
-
91
-
92
-
93
- # define any options here
94
-
95
- # for automated inference the default options are used
96
- # slider_option = gr.Slider(0,10, label="Slider Option")
97
- # checkbox_option = gr.Checkbox(label="Checkbox Option")
98
- # dropdown_option = gr.Dropdown(["Option 1", "Option 2", "Option 3"], label="Radio Option")
99
-
100
- btn = gr.Button("Run Inference")
101
-
102
- gr.Examples(
103
- [
104
- [
105
- "8i5w_R.pdb",
106
- "8i5w_R.fasta",
107
- "8i5w_L.pdb",
108
- "8i5w_L.fasta",
109
- ],
110
- ],
111
- [input_protein_1, input_fasta_1, input_protein_2, input_fasta_2],
112
- )
113
- reps = [
114
- {
115
- "model": 0,
116
- "style": "cartoon",
117
- "chain": "R",
118
- "color": "whiteCarbon",
119
- },
120
- {
121
- "model": 0,
122
- "style": "cartoon",
123
- "chain": "L",
124
- "color": "greenCarbon",
125
- },
126
- {
127
- "model": 0,
128
- "chain": "R",
129
- "style": "stick",
130
- "sidechain": True,
131
- "color": "whiteCarbon",
132
- },
133
- {
134
- "model": 0,
135
- "chain": "L",
136
- "style": "stick",
137
- "sidechain": True,
138
- "color": "greenCarbon"
139
- }
140
- ]
141
-
142
- out = Molecule3D(reps=reps)
143
- run_time = gr.Textbox(label="Runtime")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
- btn.click(predict, inputs=[input_protein_1, input_protein_2, input_fasta_1, input_fasta_2], outputs=[out, run_time])
146
 
147
  app.launch()
 
3
  from pathlib import Path
4
 
5
  import numpy as np
6
+ import pandas as pd
7
  from biotite.structure.atoms import AtomArrayStack
8
  from scipy.spatial.transform import Rotation as R
9
+ from pinder.core import PinderSystem
10
+ from pinder.core.structure import atoms
11
  from pinder.core.structure.contacts import get_stack_contacts
12
+ from pinder.core.loader.structure import Structure
13
+ from pinder.eval.dockq import BiotiteDockQ
14
 
15
  import gradio as gr
16
 
17
  from gradio_molecule3d import Molecule3D
18
 
19
+ EVAL_METRICS = ["system", "L_rms", "I_rms", "F_nat", "DOCKQ", "CAPRI_class"]
20
 
21
  def predict(
22
  receptor_pdb: Path,
 
27
  start_time = time.time()
28
  # Do inference here
29
  # return an output pdb file with the protein and two chains R and L.
30
+ receptor = atoms.atom_array_from_pdb_file(receptor_pdb, extra_fields=["b_factor"])
31
+ ligand = atoms.atom_array_from_pdb_file(ligand_pdb, extra_fields=["b_factor"])
32
+ receptor = atoms.normalize_orientation(receptor)
33
+ ligand = atoms.normalize_orientation(ligand)
34
 
35
  # Number of random poses to generate
36
  M = 50
 
74
  # System ID
75
  pdb_name = Path(receptor_pdb).stem + "--" + Path(ligand_pdb).name
76
  output_pdb = output_dir / pdb_name
77
+ atoms.write_pdb(best_pose, output_pdb)
78
  end_time = time.time()
79
  run_time = end_time - start_time
80
  return str(output_pdb), run_time
81
 
82
 
83
+ def evaluate(
84
+ system_id: str,
85
+ prediction_pdb: Path,
86
+ ) -> tuple[pd.DataFrame, float]:
87
+ start_time = time.time()
88
+ system = PinderSystem(system_id)
89
+ native = system.native.filepath
90
+ bdq = BiotiteDockQ(native, Path(prediction_pdb), parallel_io=False)
91
+ metrics = bdq.calculate()
92
+ metrics = metrics[["system", "LRMS", "iRMS", "Fnat", "DockQ", "CAPRI"]].copy()
93
+ metrics.rename(columns={"LRMS": "L_rms", "iRMS": "I_rms", "Fnat": "F_nat", "DockQ": "DOCKQ", "CAPRI": "CAPRI_class"}, inplace=True)
94
+ end_time = time.time()
95
+ run_time = end_time - start_time
96
+ pred = Structure(Path(prediction_pdb))
97
+ nat = Structure(Path(native))
98
+ pred, _, _ = pred.superimpose(nat)
99
+ pred.to_pdb(Path(prediction_pdb))
100
+ return metrics, [str(prediction_pdb), str(native)], run_time
101
+
102
+
103
  with gr.Blocks() as app:
104
+ with gr.Tab("🧬 PINDER inference template"):
105
+ gr.Markdown("Title, description, and other information about the model")
106
+ with gr.Row():
107
+ with gr.Column():
108
+ input_protein_1 = gr.File(label="Input Protein 1 monomer (PDB)")
109
+ input_fasta_1 = gr.File(label="Input Protein 1 monomer sequence (FASTA)")
110
+ with gr.Column():
111
+ input_protein_2 = gr.File(label="Input Protein 2 monomer (PDB)")
112
+ input_fasta_2 = gr.File(label="Input Protein 2 monomer sequence (FASTA)")
113
+
114
+
115
+
116
+ # define any options here
117
 
118
+ # for automated inference the default options are used
119
+ # slider_option = gr.Slider(0,10, label="Slider Option")
120
+ # checkbox_option = gr.Checkbox(label="Checkbox Option")
121
+ # dropdown_option = gr.Dropdown(["Option 1", "Option 2", "Option 3"], label="Radio Option")
122
 
123
+ btn = gr.Button("Run Inference")
124
+
125
+ gr.Examples(
126
+ [
127
+ [
128
+ "8i5w_R.pdb",
129
+ "8i5w_R.fasta",
130
+ "8i5w_L.pdb",
131
+ "8i5w_L.fasta",
132
+ ],
133
+ ],
134
+ [input_protein_1, input_fasta_1, input_protein_2, input_fasta_2],
135
+ )
136
+ reps = [
137
+ {
138
+ "model": 0,
139
+ "style": "cartoon",
140
+ "chain": "R",
141
+ "color": "whiteCarbon",
142
+ },
143
+ {
144
+ "model": 0,
145
+ "style": "cartoon",
146
+ "chain": "L",
147
+ "color": "greenCarbon",
148
+ },
149
+ {
150
+ "model": 0,
151
+ "chain": "R",
152
+ "style": "stick",
153
+ "sidechain": True,
154
+ "color": "whiteCarbon",
155
+ },
156
+ {
157
+ "model": 0,
158
+ "chain": "L",
159
+ "style": "stick",
160
+ "sidechain": True,
161
+ "color": "greenCarbon"
162
+ }
163
+ ]
164
+
165
+ out = Molecule3D(reps=reps)
166
+ run_time = gr.Textbox(label="Runtime")
167
+
168
+ btn.click(predict, inputs=[input_protein_1, input_protein_2, input_fasta_1, input_fasta_2], outputs=[out, run_time])
169
+ with gr.Tab("⚖️ PINDER evaluation template"):
170
+ with gr.Row():
171
+ with gr.Column():
172
+ input_system_id = gr.Textbox(label="PINDER system ID")
173
+ input_prediction_pdb = gr.File(label="Top ranked prediction (PDB with chains R and L)")
174
+
175
+ eval_btn = gr.Button("Run Evaluation")
176
+ gr.Examples(
177
+ [
178
+ [
179
+ "3g9w__A1_Q71LX4--3g9w__D1_P05556",
180
+ "3g9w_R--3g9w_L.pdb",
181
+ ],
182
+ ],
183
+ [input_system_id, input_prediction_pdb],
184
+ )
185
+ reps = [
186
+ {
187
+ "model": 0,
188
+ "style": "cartoon",
189
+ "chain": "R",
190
+ "color": "greenCarbon",
191
+ },
192
+ {
193
+ "model": 0,
194
+ "style": "cartoon",
195
+ "chain": "L",
196
+ "color": "cyanCarbon",
197
+ },
198
+ {
199
+ "model": 1,
200
+ "style": "cartoon",
201
+ "chain": "R",
202
+ "color": "grayCarbon",
203
+ },
204
+ {
205
+ "model": 1,
206
+ "style": "cartoon",
207
+ "chain": "L",
208
+ "color": "blueCarbon",
209
+ },
210
+ ]
211
+
212
+ pred_native = Molecule3D(reps=reps, config={"backgroundColor": "black"})
213
+ eval_run_time = gr.Textbox(label="Evaluation runtime")
214
+ metric_table = gr.DataFrame(pd.DataFrame([], columns=EVAL_METRICS),label="Evaluation metrics")
215
 
216
+ eval_btn.click(evaluate, inputs=[input_system_id, input_prediction_pdb], outputs=[metric_table, pred_native, eval_run_time])
217
 
218
  app.launch()