Spaces:
Runtime error
Runtime error
import gradio as gr | |
import os | |
import subprocess | |
import numpy as np | |
import plotly.graph_objects as go | |
from Bio.PDB import PDBParser | |
import io | |
import base64 | |
from typing import Dict, Any, Tuple, List, Optional | |
def create_download_tab(constant: Dict[str, Any]) -> Dict[str, Any]: | |
""" | |
Create the download tab with various options for downloading protein data. | |
Args: | |
constant: Dictionary containing constant values for the application | |
Returns: | |
Dictionary containing any state information | |
""" | |
def run_download_script(script_name: str, **kwargs) -> str: | |
""" | |
Run a download script with the specified arguments. | |
Args: | |
script_name: Name of the script to run | |
**kwargs: Arguments to pass to the script | |
Returns: | |
Output of the script as a string | |
""" | |
cmd = ["python", f"src/crawler/{script_name}"] | |
for k, v in kwargs.items(): | |
if v is None: # Skip None values | |
continue | |
if isinstance(v, bool): # Handle boolean flags | |
if v: | |
cmd.append(f"--{k}") | |
elif v == "--merge": # Handle special merge flag | |
cmd.append(v) | |
else: # Handle regular arguments | |
cmd.extend([f"--{k}", str(v)]) | |
try: | |
result = subprocess.run(cmd, capture_output=True, text=True) | |
if result.returncode == 0: | |
return f"Download completed successfully\n{result.stdout}" | |
else: | |
return f"Error during download:\n{result.stderr}" | |
except Exception as e: | |
return f"Failed to run download script: {str(e)}" | |
# Function to visualize protein structure using Plotly | |
def visualize_protein_structure(pdb_file: str) -> Tuple[str, go.Figure]: | |
""" | |
Visualize a protein structure from a PDB file using Plotly for interactive 3D visualization. | |
Args: | |
pdb_file: Path to the PDB file | |
Returns: | |
Tuple containing status message and Plotly figure | |
""" | |
try: | |
if not os.path.exists(pdb_file): | |
return f"File not found: {pdb_file}", None | |
# Parse the PDB file | |
parser = PDBParser(QUIET=True) | |
structure = parser.get_structure("protein", pdb_file) | |
# Extract atom coordinates and information for all atoms | |
all_atoms_x, all_atoms_y, all_atoms_z = [], [], [] | |
all_atoms_text = [] # For hover information | |
all_atoms_color = [] | |
# Color mapping for different atom types | |
color_map = { | |
'C': '#333333', # Dark gray for carbon | |
'N': '#3050F8', # Blue for nitrogen | |
'O': '#FF2010', # Red for oxygen | |
'S': '#FFFF30', # Yellow for sulfur | |
'P': '#FF8000', # Orange for phosphorus | |
'H': '#E0E0E0', # Light gray for hydrogen | |
'CA': '#00FF00' # Green for alpha carbon | |
} | |
# Extract backbone (CA atoms) for the ribbon representation | |
ca_x, ca_y, ca_z = [], [], [] | |
ca_text = [] | |
# Track chains for coloring | |
chains = {} | |
chain_colors = [ | |
'#1F77B4', '#FF7F0E', '#2CA02C', '#D62728', '#9467BD', | |
'#8C564B', '#E377C2', '#7F7F7F', '#BCBD22', '#17BECF' | |
] | |
# Create a Plotly figure | |
fig = go.Figure() | |
# Track the number of backbone traces for visibility control | |
backbone_trace_count = 0 | |
# Extract coordinates and properties | |
for model in structure: | |
for chain in model: | |
chain_id = chain.get_id() | |
if chain_id not in chains: | |
chains[chain_id] = len(chains) % len(chain_colors) | |
chain_color = chain_colors[chains[chain_id]] | |
# Collect CA atoms for this chain | |
chain_ca_x, chain_ca_y, chain_ca_z = [], [], [] | |
chain_ca_text = [] | |
for residue in chain: | |
res_id = residue.get_id() | |
res_name = residue.get_resname() | |
res_num = res_id[1] | |
# Extract CA atoms for backbone trace | |
if 'CA' in residue: | |
ca = residue['CA'].get_coord() | |
chain_ca_x.append(ca[0]) | |
chain_ca_y.append(ca[1]) | |
chain_ca_z.append(ca[2]) | |
chain_ca_text.append(f"Chain {chain_id}, {res_name} {res_num}") | |
# Also add to global CA lists | |
ca_x.append(ca[0]) | |
ca_y.append(ca[1]) | |
ca_z.append(ca[2]) | |
ca_text.append(f"Chain {chain_id}, {res_name} {res_num}") | |
# Extract all atoms | |
for atom in residue: | |
coord = atom.get_coord() | |
all_atoms_x.append(coord[0]) | |
all_atoms_y.append(coord[1]) | |
all_atoms_z.append(coord[2]) | |
atom_name = atom.get_name() | |
atom_element = atom.element | |
all_atoms_text.append(f"Chain {chain_id}, {res_name} {res_num}, {atom_name}") | |
# Determine atom color | |
if atom_name == 'CA': | |
all_atoms_color.append(color_map.get('CA', '#808080')) | |
else: | |
all_atoms_color.append(color_map.get(atom_element, '#808080')) | |
# Add this chain's CA atoms as a separate trace for better visualization | |
if chain_ca_x: | |
fig.add_trace(go.Scatter3d( | |
x=chain_ca_x, | |
y=chain_ca_y, | |
z=chain_ca_z, | |
mode='lines', | |
name=f'Chain {chain_id}', | |
line=dict(color=chain_color, width=8), # Increased line width | |
text=chain_ca_text, | |
hoverinfo='text', | |
showlegend=True | |
)) | |
backbone_trace_count += 1 | |
# Add backbone trace (CA atoms as markers) | |
fig.add_trace(go.Scatter3d( | |
x=ca_x, | |
y=ca_y, | |
z=ca_z, | |
mode='markers', | |
name='Backbone', | |
marker=dict( | |
size=7, # Increased marker size | |
color='#00FF00', | |
opacity=0.8, | |
symbol='circle' | |
), | |
text=ca_text, | |
hoverinfo='text', | |
showlegend=True | |
)) | |
backbone_trace_count += 1 | |
# Add all atoms as small markers | |
fig.add_trace(go.Scatter3d( | |
x=all_atoms_x, | |
y=all_atoms_y, | |
z=all_atoms_z, | |
mode='markers', | |
name='All Atoms', | |
marker=dict( | |
size=2.5, | |
color=all_atoms_color, | |
opacity=0.6 | |
), | |
text=all_atoms_text, | |
hoverinfo='text', | |
showlegend=True, | |
visible='legendonly' # Hide by default, can be toggled in legend | |
)) | |
# Set layout properties | |
pdb_id = os.path.basename(pdb_file).split('.')[0] | |
fig.update_layout( | |
title=dict( | |
text=f"Structure: {pdb_id}", | |
font=dict(size=20, family="Arial, sans-serif") | |
), | |
scene=dict( | |
xaxis=dict(title='X (Å)', showbackground=False, showgrid=True, gridcolor='lightgray'), | |
yaxis=dict(title='Y (Å)', showbackground=False, showgrid=True, gridcolor='lightgray'), | |
zaxis=dict(title='Z (Å)', showbackground=False, showgrid=True, gridcolor='lightgray'), | |
aspectmode='data', | |
camera=dict( | |
eye=dict(x=1.5, y=1.5, z=1.5) | |
) | |
), | |
margin=dict(l=0, r=0, b=0, t=40), | |
legend=dict( | |
yanchor="top", | |
y=0.99, | |
xanchor="left", | |
x=0.01, | |
bgcolor="rgba(255, 255, 255, 0.8)", | |
bordercolor="lightgray", | |
borderwidth=1 | |
), | |
template="plotly_white", | |
height=600, # Increase height for better visualization | |
width=800 # Set width for better aspect ratio | |
) | |
# Create visibility arrays for the buttons | |
# For "Backbone Only": all backbone traces visible, all atoms hidden | |
backbone_only_visibility = [True] * backbone_trace_count + [False] | |
# For "All Atoms": all traces visible | |
all_atoms_visibility = [True] * (backbone_trace_count + 1) | |
# Add buttons for different views | |
fig.update_layout( | |
updatemenus=[ | |
dict( | |
type="buttons", | |
direction="right", | |
buttons=[ | |
dict( | |
args=[{"visible": backbone_only_visibility}], | |
label="Backbone Only", | |
method="update" | |
), | |
dict( | |
args=[{"visible": all_atoms_visibility}], | |
label="All Atoms", | |
method="update" | |
) | |
], | |
pad={"r": 10, "t": 10}, | |
showactive=True, | |
x=0.1, | |
xanchor="left", | |
y=1.1, | |
yanchor="top", | |
bgcolor="rgba(255, 255, 255, 0.8)", | |
bordercolor="lightgray", | |
borderwidth=1 | |
), | |
] | |
) | |
return f"Successfully visualized structure from {pdb_file}", fig | |
except Exception as e: | |
import traceback | |
error_details = traceback.format_exc() | |
print(f"Error visualizing structure: {str(e)}\n{error_details}") | |
return f"Error visualizing structure: {str(e)}", None | |
# Create the main download tab | |
with gr.Tab("Download"): | |
with gr.Row(): | |
with gr.Column(): | |
gr.Markdown("### Download Protein Data (See help for more details)") | |
# InterPro Metadata tab | |
with gr.Tab("InterPro Metadata"): | |
with gr.Row(): | |
interpro_method = gr.Radio( | |
choices=["Single ID", "From JSON"], | |
label="Download Method", | |
value="Single ID" | |
) | |
with gr.Column(): | |
interpro_id = gr.Textbox(label="InterPro ID", value="IPR000001") | |
interpro_json = gr.Textbox(label="InterPro JSON Path", value="download/interpro_domain/interpro_json.customization", visible=False) | |
interpro_out = gr.Textbox(label="Output Directory", value="download/interpro_domain") | |
interpro_error = gr.Checkbox(label="Save error file", value=True) | |
interpro_btn = gr.Button("Download InterPro Data") | |
interpro_output = gr.Textbox(label="Output", interactive=False) | |
def update_interpro_visibility(method): | |
"""Update visibility of InterPro input fields based on selected method""" | |
return { | |
interpro_id: gr.update(visible=(method == "Single ID")), | |
interpro_json: gr.update(visible=(method == "From JSON")) | |
} | |
interpro_method.change( | |
fn=update_interpro_visibility, | |
inputs=[interpro_method], | |
outputs=[interpro_id, interpro_json] | |
) | |
# RCSB Metadata tab | |
with gr.Tab("RCSB Metadata"): | |
with gr.Row(): | |
rcsb_method = gr.Radio( | |
choices=["Single ID", "From File"], | |
label="Download Method", | |
value="Single ID" | |
) | |
with gr.Column(): | |
rcsb_id = gr.Textbox(label="PDB ID", value="1a0j") | |
rcsb_file = gr.Textbox(label="PDB List File", value="download/rcsb.txt", visible=False) | |
rcsb_out = gr.Textbox(label="Output Directory", value="download/rcsb_metadata") | |
rcsb_error = gr.Checkbox(label="Save error file", value=True) | |
rcsb_btn = gr.Button("Download RCSB Metadata") | |
rcsb_output = gr.Textbox(label="Output", interactive=False) | |
def update_rcsb_visibility(method): | |
"""Update visibility of RCSB input fields based on selected method""" | |
return { | |
rcsb_id: gr.update(visible=(method == "Single ID")), | |
rcsb_file: gr.update(visible=(method == "From File")) | |
} | |
rcsb_method.change( | |
fn=update_rcsb_visibility, | |
inputs=[rcsb_method], | |
outputs=[rcsb_id, rcsb_file] | |
) | |
# UniProt Sequences tab | |
with gr.Tab("UniProt Sequences"): | |
with gr.Row(): | |
uniprot_method = gr.Radio( | |
choices=["Single ID", "From File"], | |
label="Download Method", | |
value="Single ID" | |
) | |
with gr.Column(): | |
uniprot_id = gr.Textbox(label="UniProt ID", value="P00734") | |
uniprot_file = gr.Textbox(label="UniProt ID List File", value="download/uniprot.txt", visible=False) | |
uniprot_out = gr.Textbox(label="Output Directory", value="download/uniprot_sequences") | |
uniprot_merge = gr.Checkbox(label="Merge into single FASTA", value=False) | |
uniprot_error = gr.Checkbox(label="Save error file", value=True) | |
uniprot_btn = gr.Button("Download UniProt Sequences") | |
uniprot_output = gr.Textbox(label="Output", interactive=False) | |
def update_uniprot_visibility(method): | |
"""Update visibility of UniProt input fields based on selected method""" | |
return { | |
uniprot_id: gr.update(visible=(method == "Single ID")), | |
uniprot_file: gr.update(visible=(method == "From File")) | |
} | |
uniprot_method.change( | |
fn=update_uniprot_visibility, | |
inputs=[uniprot_method], | |
outputs=[uniprot_id, uniprot_file] | |
) | |
# RCSB Structures tab | |
with gr.Tab("RCSB Structures"): | |
with gr.Row(): | |
# Left column for inputs | |
with gr.Column(scale=3): | |
with gr.Group(): # Group for better visual separation | |
struct_method = gr.Radio( | |
choices=["Single ID", "From File"], | |
label="Download Method", | |
value="Single ID" | |
) | |
# Input parameters section with consistent spacing | |
with gr.Row(): | |
struct_id = gr.Textbox(label="PDB ID", value="1a0j") | |
with gr.Row(): | |
struct_file = gr.Textbox(label="PDB List File", value="download/rcsb.txt", visible=False) | |
with gr.Row(): | |
struct_out = gr.Textbox(label="Output Directory", value="download/rcsb_structures") | |
with gr.Row(): | |
struct_type = gr.Dropdown( | |
choices=["cif", "pdb", "pdb1", "xml", "sf", "mr", "mrstr"], | |
value="pdb", | |
label="Structure Type" | |
) | |
with gr.Row(): | |
with gr.Column(scale=1): | |
struct_unzip = gr.Checkbox(label="Unzip downloaded files", value=True) | |
with gr.Column(scale=1): | |
struct_error = gr.Checkbox(label="Save error file", value=True) | |
with gr.Row(): | |
struct_btn = gr.Button("Download RCSB Structures", size="lg") | |
# Output section | |
struct_output = gr.Textbox(label="Download Output", interactive=False, lines=4) | |
struct_viz_status = gr.Textbox(label="Visualization Status", interactive=False) | |
# Right column for visualization | |
with gr.Column(scale=5): | |
# Visualization section with full height | |
struct_viz = gr.Plot(label="Structure Visualization", elem_id="struct_viz_plot") | |
def update_struct_visibility(method): | |
"""Update visibility of RCSB structure input fields based on selected method""" | |
return { | |
struct_id: gr.update(visible=(method == "Single ID")), | |
struct_file: gr.update(visible=(method == "From File")) | |
} | |
struct_method.change( | |
fn=update_struct_visibility, | |
inputs=[struct_method], | |
outputs=[struct_id, struct_file] | |
) | |
# AlphaFold2 Structures tab | |
with gr.Tab("AlphaFold2 Structures"): | |
with gr.Row(): | |
# Left column for inputs | |
with gr.Column(scale=3): | |
with gr.Group(): # Group for better visual separation | |
af_method = gr.Radio( | |
choices=["Single ID", "From File"], | |
label="Download Method", | |
value="Single ID" | |
) | |
# Input parameters section with consistent spacing | |
with gr.Row(): | |
af_id = gr.Textbox(label="UniProt ID", value="P00734") | |
with gr.Row(): | |
af_file = gr.Textbox(label="UniProt ID List File", value="download/uniprot.txt", visible=False) | |
with gr.Row(): | |
af_out = gr.Textbox(label="Output Directory", value="download/alphafold2_structures") | |
with gr.Row(): | |
af_index_level = gr.Number(label="Index Level", value=0, precision=0) | |
with gr.Row(): | |
af_error = gr.Checkbox(label="Save error file", value=True) | |
with gr.Row(): | |
af_btn = gr.Button("Download AlphaFold Structures", size="lg") | |
# Output section | |
af_output = gr.Textbox(label="Download Output", interactive=False, lines=4) | |
af_viz_status = gr.Textbox(label="Visualization Status", interactive=False) | |
# Right column for visualization | |
with gr.Column(scale=5): | |
# Visualization section with full height | |
af_viz = gr.Plot(label="Structure Visualization", elem_id="af_viz_plot") | |
def update_af_visibility(method): | |
"""Update visibility of AlphaFold input fields based on selected method""" | |
return { | |
af_id: gr.update(visible=(method == "Single ID")), | |
af_file: gr.update(visible=(method == "From File")) | |
} | |
af_method.change( | |
fn=update_af_visibility, | |
inputs=[af_method], | |
outputs=[af_id, af_file] | |
) | |
# Handler functions for download buttons | |
def handle_interpro_download(method, id_val, json_val, out_dir, error): | |
"""Handle InterPro data download""" | |
if method == "Single ID": | |
return run_download_script( | |
"metadata/download_interpro.py", | |
interpro_id=id_val, | |
out_dir=out_dir, | |
error_file=f"{out_dir}/failed.txt" if error else None | |
) | |
else: | |
return run_download_script( | |
"metadata/download_interpro.py", | |
interpro_json=json_val, | |
out_dir=out_dir, | |
error_file=f"{out_dir}/failed.txt" if error else None | |
) | |
interpro_btn.click( | |
fn=handle_interpro_download, | |
inputs=[interpro_method, interpro_id, interpro_json, interpro_out, interpro_error], | |
outputs=interpro_output | |
) | |
def handle_rcsb_download(method, id_val, file_val, out_dir, error): | |
"""Handle RCSB metadata download""" | |
if method == "Single ID": | |
return run_download_script( | |
"metadata/download_rcsb.py", | |
pdb_id=id_val, | |
out_dir=out_dir, | |
error_file=f"{out_dir}/failed.txt" if error else None | |
) | |
else: | |
return run_download_script( | |
"metadata/download_rcsb.py", | |
pdb_id_file=file_val, | |
out_dir=out_dir, | |
error_file=f"{out_dir}/failed.txt" if error else None | |
) | |
rcsb_btn.click( | |
fn=handle_rcsb_download, | |
inputs=[rcsb_method, rcsb_id, rcsb_file, rcsb_out, rcsb_error], | |
outputs=rcsb_output | |
) | |
def handle_uniprot_download(method, id_val, file_val, out_dir, merge, error): | |
"""Handle UniProt sequence download""" | |
if method == "Single ID": | |
return run_download_script( | |
"sequence/download_uniprot_seq.py", | |
uniprot_id=id_val, | |
out_dir=out_dir, | |
merge="--merge" if merge else None, | |
error_file=f"{out_dir}/failed.txt" if error else None | |
) | |
else: | |
return run_download_script( | |
"sequence/download_uniprot_seq.py", | |
file=file_val, | |
out_dir=out_dir, | |
merge="--merge" if merge else None, | |
error_file=f"{out_dir}/failed.txt" if error else None | |
) | |
uniprot_btn.click( | |
fn=handle_uniprot_download, | |
inputs=[uniprot_method, uniprot_id, uniprot_file, uniprot_out, uniprot_merge, uniprot_error], | |
outputs=uniprot_output | |
) | |
def handle_struct_download(method, id_val, file_val, out_dir, type_val, unzip, error): | |
""" | |
Handle RCSB structure download and visualization | |
Args: | |
method: Download method (Single ID or From File) | |
id_val: PDB ID for single download | |
file_val: File path for batch download | |
out_dir: Output directory | |
type_val: Structure file type | |
unzip: Whether to unzip downloaded files | |
error: Whether to save error file | |
Returns: | |
Tuple containing download output, visualization status, and Plotly figure | |
""" | |
# Download the structure | |
if method == "Single ID": | |
download_output = run_download_script( | |
"structure/download_rcsb.py", | |
pdb_id=id_val, | |
out_dir=out_dir, | |
type=type_val, | |
unzip="--unzip" if unzip else None, | |
error_file=f"{out_dir}/failed.txt" if error else None | |
) | |
# Visualize the downloaded structure | |
if "Download completed successfully" in download_output: | |
pdb_file = f"{out_dir}/{id_val.lower()}.{type_val}" | |
if type_val == "pdb" and os.path.exists(pdb_file): | |
viz_status, viz_fig = visualize_protein_structure(pdb_file) | |
return download_output, viz_status, viz_fig | |
else: | |
return download_output, f"Cannot visualize {type_val} format or file not found", None | |
else: | |
return download_output, "Download failed, cannot visualize", None | |
else: | |
download_output = run_download_script( | |
"structure/download_rcsb.py", | |
pdb_id_file=file_val, | |
out_dir=out_dir, | |
type=type_val, | |
unzip="--unzip" if unzip else None, | |
error_file=f"{out_dir}/failed.txt" if error else None | |
) | |
return download_output, "Batch download completed, select a single ID to visualize", None | |
struct_btn.click( | |
fn=handle_struct_download, | |
inputs=[struct_method, struct_id, struct_file, struct_out, struct_type, struct_unzip, struct_error], | |
outputs=[struct_output, struct_viz_status, struct_viz] | |
) | |
def handle_af_download(method, id_val, file_val, out_dir, index_level, error): | |
""" | |
Handle AlphaFold structure download and visualization | |
Args: | |
method: Download method (Single ID or From File) | |
id_val: UniProt ID for single download | |
file_val: File path for batch download | |
out_dir: Output directory | |
index_level: Index level for directory structure | |
error: Whether to save error file | |
Returns: | |
Tuple containing download output, visualization status, and Plotly figure | |
""" | |
# Download the structure | |
if method == "Single ID": | |
download_output = run_download_script( | |
"structure/download_alphafold.py", | |
uniprot_id=id_val, | |
out_dir=out_dir, | |
index_level=index_level, | |
error_file=f"{out_dir}/failed.txt" if error else None | |
) | |
# Visualize the downloaded structure | |
if "Download completed successfully" in download_output: | |
# Try different possible file paths | |
possible_paths = [ | |
f"{out_dir}/AF-{id_val}-F1-model_v4.pdb", | |
f"{out_dir}/{id_val}.pdb" | |
] | |
for pdb_file in possible_paths: | |
if os.path.exists(pdb_file): | |
viz_status, viz_fig = visualize_protein_structure(pdb_file) | |
return download_output, viz_status, viz_fig | |
return download_output, f"PDB file not found in expected locations", None | |
else: | |
return download_output, "Download failed, cannot visualize", None | |
else: | |
download_output = run_download_script( | |
"structure/download_alphafold.py", | |
uniprot_id_file=file_val, | |
out_dir=out_dir, | |
index_level=index_level, | |
error_file=f"{out_dir}/failed.txt" if error else None | |
) | |
return download_output, "Batch download completed, select a single ID to visualize", None | |
af_btn.click( | |
fn=handle_af_download, | |
inputs=[af_method, af_id, af_file, af_out, af_index_level, af_error], | |
outputs=[af_output, af_viz_status, af_viz] | |
) | |
return {} | |