Spaces:
Runtime error
Runtime error
import os | |
import argparse | |
import json | |
from tqdm import tqdm | |
# conda install -c conda-forge -c bioconda foldseek | |
def get_foldseek_structure_seq(pdb_dir, rm_tmp=True): | |
# foldseek createdb INPUT_dir_with_structures tmp_db | |
# foldseek lndb tmp_db_h tmp_db_ss_h | |
# foldseek convert2fasta tmp_db_ss OUTPUT_3di.fasta | |
# use command to generate foldseek structure seq | |
os.makedirs("tmp_db", exist_ok=True) | |
os.system(f"foldseek createdb {pdb_dir} tmp_db/tmp_db") | |
os.system(f"foldseek lndb tmp_db/tmp_db_h tmp_db/tmp_db_ss_h") | |
os.system(f"foldseek convert2fasta tmp_db/tmp_db_ss tmp_db/tmp_db_ss.fasta") | |
results = [] | |
# read fasta file | |
with open("tmp_db/tmp_db_ss.fasta", "r") as f: | |
for line in tqdm(f): | |
if line.startswith(">"): | |
name = line.split()[0][1:] | |
seq = next(f).strip() | |
results.append({"name":name.split('.')[0], "foldseek_seq":seq}) | |
if rm_tmp: | |
os.system("rm -rf tmp_db") | |
return results | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--pdb_dir", type=str, default=None) | |
parser.add_argument("--out_file", type=str, default=None) | |
parser.add_argument("--rm_tmp", type=bool, default=True) | |
args = parser.parse_args() | |
results = get_foldseek_structure_seq(args.pdb_dir, args.rm_tmp) | |
with open(args.out_file, "w") as f: | |
f.write("\n".join([json.dumps(r) for r in results])) |