Spaces:
Running
Running
#-------------------------------------------------libraries--------------------------------------------------------------------------------------------------------------- | |
import torch | |
import numpy as np | |
import gradio as gr | |
import matplotlib.pyplot as plt | |
from transformers import AutoTokenizer, EsmModel | |
from sklearn.metrics.pairwise import cosine_similarity | |
from Bio.PDB import PDBParser, PDBIO | |
from Bio.PDB.StructureBuilder import StructureBuilder | |
import tempfile | |
import os | |
#----------------------------------------------------Analysis-------------------------------------------------------------------------------------------------------- | |
# Load ESM-1b model and tokenizer | |
model = EsmModel.from_pretrained("facebook/esm1b_t33_650M_UR50S", output_hidden_states=True) | |
tokenizer = AutoTokenizer.from_pretrained("facebook/esm1b_t33_650M_UR50S") | |
def compute_residue_scores(seq): | |
inputs = tokenizer(seq, return_tensors="pt") | |
with torch.no_grad(): | |
outputs = model(**inputs) | |
embedding = outputs.last_hidden_state[0] # shape (L+2, d) | |
L = len(seq) | |
embedding = embedding[1:L+1] # shape (L, d) | |
sim_matrix = cosine_similarity(embedding.detach().cpu().numpy()) | |
residue_scores = np.sum(sim_matrix, axis=1) | |
norm_scores = 100 * (residue_scores - np.min(residue_scores)) / (np.max(residue_scores) - np.min(residue_scores)) | |
return norm_scores | |
def inject_bfactors_into_pdb(pdb_file, scores): | |
parser = PDBParser(QUIET=True) | |
structure = parser.get_structure("prot", pdb_file.name) | |
i = 0 | |
for model in structure: | |
for chain in model: | |
for residue in chain: | |
if i >= len(scores): | |
break | |
for atom in residue: | |
atom.bfactor = float(scores[i]) | |
i += 1 | |
out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pdb").name | |
io = PDBIO() | |
io.set_structure(structure) | |
io.save(out_path) | |
return out_path | |
def process(seq, pdb_file): | |
scores = compute_residue_scores(seq) | |
pdb_with_scores = inject_bfactors_into_pdb(pdb_file, scores) | |
return pdb_with_scores | |
# Gradio Interface | |
demo = gr.Interface( | |
fn=process, | |
inputs=[ | |
gr.Textbox(label="Input Protein Sequence (1-letter code)"), | |
gr.File(label="Upload PDB File", file_types=[".pdb"]) | |
], | |
outputs=gr.File(label="Modified PDB with Scores in B-factor Column"), | |
title="ESM-1b Residue Scoring: B-factor Injection for Structural Visualization" | |
) | |
demo.launch() | |