import os import time import requests import numpy as np from flask import Flask, render_template, request, send_file from rdkit import Chem from transformers import AutoModelForMaskedLM, AutoTokenizer from bio_embeddings.embed import ProtTransBertBFDEmbedder from modelstrc import CVanilla_RNN_Builder, get_mol_from_graph_list # 🚀 Define Directories for Railway bio_model_dir = "/app/modelsBioembed" # Persistent model storage cvn_model_dir = "/app/models_folder" UPLOAD_FOLDER = "/app/Samples" os.makedirs(bio_model_dir, exist_ok=True) os.makedirs(cvn_model_dir, exist_ok=True) os.makedirs(UPLOAD_FOLDER, exist_ok=True) # ✅ Environment Variables for Temp Directory os.environ["TMPDIR"] = bio_model_dir os.environ["TEMP"] = bio_model_dir os.environ["TMP"] = bio_model_dir os.environ['NUMBA_CACHE_DIR'] = '/app/numba_cache' os.environ['TRANSFORMERS_CACHE'] = '/app/hf_cache' # 🔗 Dropbox Links for Model Files DROPBOX_LINKS = { "pytorch_model.bin": "https://www.dropbox.com/scl/fi/b41t8c6ji7j6uk5y2jj8g/pytorch_model.bin?rlkey=kuuwkid36ugml560c4a465ilr&st=t60bfemx&dl=1", "config.json": "https://www.dropbox.com/scl/fi/js6czj3kfc4a5kshfkzie/config.json?rlkey=5oysq4ecilnan5tviuqe86v93&st=75zpce8h&dl=1", "tokenizer_config.json": "https://www.dropbox.com/scl/fi/x11poym6mueoxod7xb6f1/tokenizer_config.json?rlkey=s51pik2rkmqp1fu99qj9qaria&st=z9kkcxp7&dl=1", "vocab.txt": "https://www.dropbox.com/scl/fi/v6e2gn10ck4lpx4iv9kpe/vocab.txt?rlkey=dcu29g5ns4wtqdv0pkks0ehx1&st=qt187rhq&dl=1", "special_tokens_map.json": "https://www.dropbox.com/scl/fi/t3lvmp5x28d1zjac3j7ec/special_tokens_map.json?rlkey=z2xbompa54iu4y9qgb5bvmfc9&st=zrxlpjdt&dl=1" } # # 📥 Function to Download Model Files # def download_model_files(): # for filename, url in DROPBOX_LINKS.items(): # file_path = os.path.join(bio_model_dir, filename) # if not os.path.exists(file_path): # Avoid re-downloading # print(f"Downloading {filename}...") # response = requests.get(url, stream=True) # if response.status_code == 200: # with open(file_path, "wb") as f: # for chunk in response.iter_content(chunk_size=1024): # f.write(chunk) # print(f"Downloaded: {filename}") # else: # print(f"Failed to download {filename}") def download_model_files(): for filename, url in DROPBOX_LINKS.items(): file_path = os.path.join(bio_model_dir, filename) print(f"Downloading {filename} (forcing overwrite)...") response = requests.get(url, stream=True) if response.status_code == 200: with open(file_path, "wb") as f: for chunk in response.iter_content(chunk_size=1024): f.write(chunk) print(f"Downloaded: {filename}") else: print(f"Failed to download {filename}") # 📥 Download models before starting download_model_files() # # ✅ Load ProtTrans-BERT-BFD Model # print("Loading ProtTrans-BERT-BFD model...") # model = AutoModelForMaskedLM.from_pretrained(bio_model_dir) # tokenizer = AutoTokenizer.from_pretrained(bio_model_dir) # ✅ Load Bio-Embedding Model try: print("Loading ProtTrans-BERT-BFD model...") embedder = ProtTransBertBFDEmbedder(model_directory=bio_model_dir) except Exception as e: print(f"Error loading ProtTrans-BERT-BFD model: {e}") embedder = None # 🧬 Generate Bio-Embeddings def generate_bio_embeddings(sequence): if embedder is None: return None try: embedding_protein = embedder.embed(sequence) embedding_per_protein = embedder.reduce_per_protein(embedding_protein) return np.array(embedding_per_protein).reshape(1, -1) except Exception as e: print(f"Embedding Error: {e}") return None # 🔬 Generate SMILES from Protein Sequence def generate_smiles(sequence, n_samples=100): start_time = time.time() protein_embedding = generate_bio_embeddings(sequence) if protein_embedding is None: return None, "Embedding generation failed!" model = CVanilla_RNN_Builder(cvn_model_dir, gpu_id=None) samples = model.sample(n_samples, c=protein_embedding[0], output_type='graph') valid_samples = [sample for sample in samples if sample is not None] smiles_list = [ Chem.MolToSmiles(mol) for mol in get_mol_from_graph_list(valid_samples, sanitize=True) if mol is not None ] if not smiles_list: return None, "No valid SMILES generated!" filename = os.path.join(UPLOAD_FOLDER, "SMILES_GENERATED.txt") with open(filename, "w") as file: file.write("\n".join(smiles_list)) elapsed_time = time.time() - start_time return filename, elapsed_time # 🌐 Flask Web App app = Flask(__name__) @app.route("/", methods=["GET", "POST"]) def index(): if request.method == "POST": sequence = request.form["sequence"].strip() if not sequence: return render_template("index.html", message="Please enter a valid sequence.") file_path, result = generate_smiles(sequence) if file_path is None: return render_template("index.html", message=f"Error: {result}") return render_template("index.html", message="SMILES generated successfully!", file_path=file_path, time_taken=result) return render_template("index.html") @app.route("/download") def download_file(): file_path = os.path.join(UPLOAD_FOLDER, "SMILES_GENERATED.txt") return send_file(file_path, as_attachment=True) # 🚀 Run the Flask App on Railway if __name__ == "__main__": app.run(host="0.0.0.0", port=7860) # import os # import time # import requests # import numpy as np # import subprocess # from flask import Flask, render_template, request, send_file # from rdkit import Chem # from transformers import AutoModel # from bio_embeddings.embed import ProtTransBertBFDEmbedder # from modelstrc import CVanilla_RNN_Builder, get_mol_from_graph_list # # DROPBOX LINKS FOR MODEL FILES # DROPBOX_LINKS = { # "pytorch_model.bin": "https://www.dropbox.com/scl/fi/b41t8c6ji7j6uk5y2jj8g/pytorch_model.bin?rlkey=kuuwkid36ugml560c4a465ilr&st=t60bfemx&dl=1", # "config.json": "https://www.dropbox.com/scl/fi/js6czj3kfc4a5kshfkzie/config.json?rlkey=5oysq4ecilnan5tviuqe86v93&st=75zpce8h&dl=1", # "tokenizer_config.json": "https://www.dropbox.com/scl/fi/x11poym6mueoxod7xb6f1/tokenizer_config.json?rlkey=s51pik2rkmqp1fu99qj9qaria&st=z9kkcxp7&dl=1", # "vocab.txt": "https://www.dropbox.com/scl/fi/v6e2gn10ck4lpx4iv9kpe/vocab.txt?rlkey=dcu29g5ns4wtqdv0pkks0ehx1&st=qt187rhq&dl=1", # "special_tokens_map.json": "https://www.dropbox.com/scl/fi/t3lvmp5x28d1zjac3j7ec/special_tokens_map.json?rlkey=z2xbompa54iu4y9qgb5bvmfc9&st=zrxlpjdt&dl=1" # } # # LOCAL DIRECTORIES # bio_model_dir = os.path.join(os.getcwd(), "modelsBioembed") # cvn_model_dir = os.path.join(os.getcwd(), "models_folder") # UPLOAD_FOLDER = "Samples" # os.makedirs(bio_model_dir, exist_ok=True) # os.makedirs(cvn_model_dir, exist_ok=True) # os.makedirs(UPLOAD_FOLDER, exist_ok=True) # os.environ["TMPDIR"] = bio_model_dir # os.environ["TEMP"] = bio_model_dir # os.environ["TMP"] = bio_model_dir # # FUNCTION TO DOWNLOAD FILES FROM DROPBOX # for file_name, url in DROPBOX_LINKS.items(): # file_path = os.path.join(bio_model_dir, file_name) # if not os.path.exists(file_path): # print(f"Downloading {file_name} from Dropbox...") # subprocess.run(["wget", "-O", file_path, url], check=True) # print(f"{file_name} downloaded!") # # BIO-EMBEDDING MODEL LOADING # try: # embedder = ProtTransBertBFDEmbedder(model_directory=bio_model_dir) # except Exception as e: # print(f"Error loading ProtTrans-BERT-BFD model: {e}") # embedder = None # def generate_bio_embeddings(sequence): # if embedder is None: # return None # try: # embedding_protein = embedder.embed(sequence) # embedding_per_protein = embedder.reduce_per_protein(embedding_protein) # return np.array(embedding_per_protein).reshape(1, -1) # except Exception as e: # print(f"Embedding Error: {e}") # return None # def generate_smiles(sequence, n_samples=100): # start_time = time.time() # protein_embedding = generate_bio_embeddings(sequence) # if protein_embedding is None: # return None, "Embedding generation failed!" # model = CVanilla_RNN_Builder(cvn_model_dir, gpu_id=None) # samples = model.sample(n_samples, c=protein_embedding[0], output_type='graph') # valid_samples = [sample for sample in samples if sample is not None] # smiles_list = [ # Chem.MolToSmiles(mol) for mol in get_mol_from_graph_list(valid_samples, sanitize=True) if mol is not None # ] # if not smiles_list: # return None, "No valid SMILES generated!" # filename = os.path.join(UPLOAD_FOLDER, "SMILES_GENERATED.txt") # with open(filename, "w") as file: # file.write("\n".join(smiles_list)) # elapsed_time = time.time() - start_time # return filename, elapsed_time # app = Flask(__name__) # @app.route("/", methods=["GET", "POST"]) # def index(): # if request.method == "POST": # sequence = request.form["sequence"].strip() # if not sequence: # return render_template("index.html", message="Please enter a valid sequence.") # file_path, result = generate_smiles(sequence) # if file_path is None: # return render_template("index.html", message=f"Error: {result}") # return render_template("index.html", message="SMILES generated successfully!", file_path=file_path, time_taken=result) # return render_template("index.html") # @app.route("/download") # def download_file(): # file_path = os.path.join(UPLOAD_FOLDER, "SMILES_GENERATED.txt") # return send_file(file_path, as_attachment=True) # if __name__ == "__main__": # app.run(host="0.0.0.0", port=8000, debug=True) # import os # import time # import numpy as np # from flask import Flask, render_template, request, send_file # from rdkit import Chem # from transformers import AutoModel # from bio_embeddings.embed import ProtTransBertBFDEmbedder # from modelstrc import CVanilla_RNN_Builder, get_mol_from_graph_list # # # DIRECTORIES # # bio_model_dir = os.path.join(os.getcwd(), "modelsBioembed") # For bio-embeddings # # cvn_model_dir = os.path.join(os.getcwd(), "models_folder") # For CVanilla_RNN_Builder # #bio_model_dir = os.getenv("BIO_MODEL_DIR", "modelsBioembed") # bio_model_dir = "/app/modelsBioembed" # cvn_model_dir = os.getenv("CVN_MODEL_DIR", "models_folder") # os.makedirs(bio_model_dir, exist_ok=True) # os.makedirs(cvn_model_dir, exist_ok=True) # os.environ["TMPDIR"] = bio_model_dir # os.environ["TEMP"] = bio_model_dir # os.environ["TMP"] = bio_model_dir # UPLOAD_FOLDER = "Samples" # os.makedirs(UPLOAD_FOLDER, exist_ok=True) # app = Flask(__name__) # # model_path = os.path.join(bio_model_dir, "pytorch_model.bin") # # if not os.path.exists(model_path): # # print("Downloading ProtTrans-BERT-BFD model...") # # AutoModel.from_pretrained("Rostlab/prot_bert_bfd", low_cpu_mem_usage=True).save_pretrained(bio_model_dir) # # BIO-EMBEDDING MODEL LOADING # try: # print("Loading Model") # embedder = ProtTransBertBFDEmbedder(model_directory=bio_model_dir) # except Exception as e: # print(f"Error loading ProtTrans-BERT-BFD model: {e}") # embedder = None # def generate_bio_embeddings(sequence): # """Generate bio-embeddings for a given protein sequence.""" # if embedder is None: # return None # try: # embedding_protein = embedder.embed(sequence) # embedding_per_protein = embedder.reduce_per_protein(embedding_protein) # return np.array(embedding_per_protein).reshape(1, -1) # Reshape for model compatibility # except Exception as e: # print(f"Embedding Error: {e}") # return None # def generate_smiles(sequence, n_samples=100): # """Generate SMILES from a protein sequence.""" # start_time = time.time() # protein_embedding = generate_bio_embeddings(sequence) # if protein_embedding is None: # return None, "Embedding generation failed!" # # TRAINED CVanilla_RNN_Builder MODEL LOADING # model = CVanilla_RNN_Builder(cvn_model_dir, gpu_id=None) # # MOLECULAR GRAPH GENERATION # samples = model.sample(n_samples, c=protein_embedding[0], output_type='graph') # valid_samples = [sample for sample in samples if sample is not None] # # CONVERSION TO SMILES # smiles_list = [ # Chem.MolToSmiles(mol) for mol in get_mol_from_graph_list(valid_samples, sanitize=True) if mol is not None # ] # if not smiles_list: # return None, "No valid SMILES generated!" # # SAVING TO FILE # filename = os.path.join(UPLOAD_FOLDER, "SMILES_GENERATED.txt") # with open(filename, "w") as file: # file.write("\n".join(smiles_list)) # elapsed_time = time.time() - start_time # return filename, elapsed_time # @app.route("/", methods=["GET", "POST"]) # def index(): # if request.method == "POST": # sequence = request.form["sequence"].strip() # if not sequence: # return render_template("index.html", message="Please enter a valid sequence.") # file_path, result = generate_smiles(sequence) # if file_path is None: # return render_template("index.html", message=f"Error: {result}") # return render_template("index.html", message="SMILES generated successfully!", file_path=file_path, time_taken=result) # return render_template("index.html") # @app.route("/download") # def download_file(): # file_path = os.path.join(UPLOAD_FOLDER, "SMILES_GENERATED.txt") # return send_file(file_path, as_attachment=True) # if __name__ == "__main__": # app.run(host="0.0.0.0", port=8000) #MAIN # import os # import time # import requests # import numpy as np # from flask import Flask, render_template, request, send_file # from rdkit import Chem # from transformers import AutoModel # from bio_embeddings.embed import ProtTransBertBFDEmbedder # from modelstrc import CVanilla_RNN_Builder, get_mol_from_graph_list # # HUGGING FACE MODEL REPO (Replace with your actual Hugging Face username) # MODEL_BASE_URL = "https://huggingface.co/Bhanushray/protein-smiles-model/tree/main" # # REQUIRED MODEL FILES # MODEL_FILES = [ # "pytorch_model.bin", # "config.json", # "tokenizer_config.json", # "vocab.txt", # "special_tokens_map.json" # ] # # DIRECTORIES # bio_model_dir = os.getenv("BIO_MODEL_DIR", "modelsBioembed") # cvn_model_dir = os.getenv("CVN_MODEL_DIR", "models_folder") # # bio_model_dir = os.path.join(os.getcwd(), "modelsBioembed") # For bio-embeddings # # cvn_model_dir = os.path.join(os.getcwd(), "models_folder") # For CVanilla_RNN_Builder # os.makedirs(bio_model_dir, exist_ok=True) # os.makedirs(cvn_model_dir, exist_ok=True) # os.environ["TMPDIR"] = bio_model_dir # os.environ["TEMP"] = bio_model_dir # os.environ["TMP"] = bio_model_dir # UPLOAD_FOLDER = "Samples" # os.makedirs(UPLOAD_FOLDER, exist_ok=True) # app = Flask(__name__) # # DOWNLOAD MODEL FILES IF MISSING # for file_name in MODEL_FILES: # file_path = os.path.join(bio_model_dir, file_name) # if not os.path.exists(file_path): # print(f"Downloading {file_name} ...") # response = requests.get(MODEL_BASE_URL + file_name, stream=True) # with open(file_path, "wb") as f: # for chunk in response.iter_content(chunk_size=1024): # f.write(chunk) # print(f"{file_name} downloaded!") # # BIO-EMBEDDING MODEL LOADING # try: # embedder = ProtTransBertBFDEmbedder(model_directory=bio_model_dir) # except Exception as e: # print(f"Error loading ProtTrans-BERT-BFD model: {e}") # embedder = None # def generate_bio_embeddings(sequence): # """Generate bio-embeddings for a given protein sequence.""" # if embedder is None: # return None # try: # embedding_protein = embedder.embed(sequence) # embedding_per_protein = embedder.reduce_per_protein(embedding_protein) # return np.array(embedding_per_protein).reshape(1, -1) # Reshape for model compatibility # except Exception as e: # print(f"Embedding Error: {e}") # return None # def generate_smiles(sequence, n_samples=100): # """Generate SMILES from a protein sequence.""" # start_time = time.time() # protein_embedding = generate_bio_embeddings(sequence) # if protein_embedding is None: # return None, "Embedding generation failed!" # # LOAD TRAINED CVanilla_RNN_Builder MODEL # model = CVanilla_RNN_Builder(cvn_model_dir, gpu_id=None) # # MOLECULAR GRAPH GENERATION # samples = model.sample(n_samples, c=protein_embedding[0], output_type='graph') # valid_samples = [sample for sample in samples if sample is not None] # # CONVERT TO SMILES # smiles_list = [ # Chem.MolToSmiles(mol) for mol in get_mol_from_graph_list(valid_samples, sanitize=True) if mol is not None # ] # if not smiles_list: # return None, "No valid SMILES generated!" # # SAVE TO FILE # filename = os.path.join(UPLOAD_FOLDER, "SMILES_GENERATED.txt") # with open(filename, "w") as file: # file.write("\n".join(smiles_list)) # elapsed_time = time.time() - start_time # return filename, elapsed_time # @app.route("/", methods=["GET", "POST"]) # def index(): # if request.method == "POST": # sequence = request.form["sequence"].strip() # if not sequence: # return render_template("index.html", message="Please enter a valid sequence.") # file_path, result = generate_smiles(sequence) # if file_path is None: # return render_template("index.html", message=f"Error: {result}") # return render_template("index.html", message="SMILES generated successfully!", file_path=file_path, time_taken=result) # return render_template("index.html") # @app.route("/download") # def download_file(): # file_path = os.path.join(UPLOAD_FOLDER, "SMILES_GENERATED.txt") # return send_file(file_path, as_attachment=True) # if __name__ == "__main__": # app.run(host="0.0.0.0", port=8000, debug=True) # import os # import time # import numpy as np # from flask import Flask, render_template, request, send_file # from rdkit import Chem # from transformers import AutoModel # from bio_embeddings.embed import ProtTransBertBFDEmbedder # from modelstrc import CVanilla_RNN_Builder, get_mol_from_graph_list # # DIRECTORIES # bio_model_dir = os.path.join(os.getcwd(), "modelsBioembed") # For bio-embeddings # cvn_model_dir = os.path.join(os.getcwd(), "models_folder") # For CVanilla_RNN_Builder # os.makedirs(bio_model_dir, exist_ok=True) # os.makedirs(cvn_model_dir, exist_ok=True) # os.environ["TMPDIR"] = bio_model_dir # os.environ["TEMP"] = bio_model_dir # os.environ["TMP"] = bio_model_dir # UPLOAD_FOLDER = "Samples" # os.makedirs(UPLOAD_FOLDER, exist_ok=True) # app = Flask(__name__) # model_path = os.path.join(bio_model_dir, "pytorch_model.bin") # if not os.path.exists(model_path): # print("Downloading ProtTrans-BERT-BFD model...") # AutoModel.from_pretrained("Rostlab/prot_bert_bfd", low_cpu_mem_usage=True).save_pretrained(bio_model_dir) # # BIO-EMBEDDING MODEL LOADING # try: # embedder = ProtTransBertBFDEmbedder(model_directory=bio_model_dir) # except Exception as e: # print(f"Error loading ProtTrans-BERT-BFD model: {e}") # embedder = None # def generate_bio_embeddings(sequence): # """Generate bio-embeddings for a given protein sequence.""" # if embedder is None: # return None # try: # embedding_protein = embedder.embed(sequence) # embedding_per_protein = embedder.reduce_per_protein(embedding_protein) # return np.array(embedding_per_protein).reshape(1, -1) # Reshape for model compatibility # except Exception as e: # print(f"Embedding Error: {e}") # return None # def generate_smiles(sequence, n_samples=100): # """Generate SMILES from a protein sequence.""" # start_time = time.time() # protein_embedding = generate_bio_embeddings(sequence) # if protein_embedding is None: # return None, "Embedding generation failed!" # # TRAINED CVanilla_RNN_Builder MODEL LOADING # model = CVanilla_RNN_Builder(cvn_model_dir, gpu_id=None) # # MOLECULAR GRAPH GENERATION # samples = model.sample(n_samples, c=protein_embedding[0], output_type='graph') # valid_samples = [sample for sample in samples if sample is not None] # # CONVERSION TO SMILES # smiles_list = [ # Chem.MolToSmiles(mol) for mol in get_mol_from_graph_list(valid_samples, sanitize=True) if mol is not None # ] # if not smiles_list: # return None, "No valid SMILES generated!" # # SAVING TO FILE # filename = os.path.join(UPLOAD_FOLDER, "SMILES_GENERATED.txt") # with open(filename, "w") as file: # file.write("\n".join(smiles_list)) # elapsed_time = time.time() - start_time # return filename, elapsed_time # @app.route("/", methods=["GET", "POST"]) # def index(): # if request.method == "POST": # sequence = request.form["sequence"].strip() # if not sequence: # return render_template("index.html", message="Please enter a valid sequence.") # file_path, result = generate_smiles(sequence) # if file_path is None: # return render_template("index.html", message=f"Error: {result}") # return render_template("index.html", message="SMILES generated successfully!", file_path=file_path, time_taken=result) # return render_template("index.html") # @app.route("/download") # def download_file(): # file_path = os.path.join(UPLOAD_FOLDER, "SMILES_GENERATED.txt") # return send_file(file_path, as_attachment=True) # if __name__ == "__main__": # app.run(host="0.0.0.0", port=8000,debug=True) # import os # import time # import numpy as np # from flask import Flask, render_template, request, send_file # from rdkit import Chem # from transformers import AutoModel # from bio_embeddings.embed import ProtTransBertBFDEmbedder # from modelstrc import CVanilla_RNN_Builder, get_mol_from_graph_list # from huggingface_hub import hf_hub_download # Import for direct file download # # Define directories for different models # bio_model_dir = os.path.join(os.getcwd(), "modelsBioembed") # For bio-embeddings # cvn_model_dir = os.path.join(os.getcwd(), "models_folder") # For CVanilla_RNN_Builder # # Ensure directories exist # os.makedirs(bio_model_dir, exist_ok=True) # os.makedirs(cvn_model_dir, exist_ok=True) # UPLOAD_FOLDER = "Samples" # os.makedirs(UPLOAD_FOLDER, exist_ok=True) # app = Flask(__name__) # # Download only the required pytorch_model.bin file # model_filename = "pytorch_model.bin" # model_path = os.path.join(bio_model_dir, model_filename) # if not os.path.exists(model_path): # print("Downloading pytorch_model.bin from Hugging Face...") # hf_hub_download(repo_id="Rostlab/prot_bert_bfd", filename=model_filename, local_dir=bio_model_dir) # # Load bio-embedding model once # embedder = ProtTransBertBFDEmbedder(model_directory=bio_model_dir) # def generate_bio_embeddings(sequence): # """Generate bio-embeddings for a given protein sequence.""" # try: # embedding_protein = embedder.embed(sequence) # embedding_per_protein = embedder.reduce_per_protein(embedding_protein) # return np.array(embedding_per_protein).reshape(1, -1) # except Exception as e: # print(f"Embedding Error: {e}") # return None # def generate_smiles(sequence, n_samples=100): # """Generate SMILES from a protein sequence.""" # start_time = time.time() # protein_embedding = generate_bio_embeddings(sequence) # if protein_embedding is None: # return None, "Embedding generation failed!" # model = CVanilla_RNN_Builder(cvn_model_dir, gpu_id=None) # samples = model.sample(n_samples, c=protein_embedding[0], output_type='graph') # valid_samples = [sample for sample in samples if sample is not None] # smiles_list = [ # Chem.MolToSmiles(mol) for mol in get_mol_from_graph_list(valid_samples, sanitize=True) if mol is not None # ] # if not smiles_list: # return None, "No valid SMILES generated!" # filename = os.path.join(UPLOAD_FOLDER, "SMILES_GENERATED.txt") # with open(filename, "w") as file: # file.write("\n".join(smiles_list)) # elapsed_time = time.time() - start_time # return filename, elapsed_time # @app.route("/", methods=["GET", "POST"]) # def index(): # if request.method == "POST": # sequence = request.form["sequence"].strip() # if not sequence: # return render_template("index.html", message="Please enter a valid sequence.") # file_path, result = generate_smiles(sequence) # if file_path is None: # return render_template("index.html", message=f"Error: {result}") # return render_template("index.html", message="SMILES generated successfully!", file_path=file_path, time_taken=result) # return render_template("index.html") # @app.route("/download") # def download_file(): # file_path = os.path.join(UPLOAD_FOLDER, "SMILES_GENERATED.txt") # return send_file(file_path, as_attachment=True) # if __name__ == "__main__": # app.run(host="0.0.0.0", port=8000, debug=True) # import os # import time # import requests # import numpy as np # import gdown # NEW: For Google Drive downloads # from flask import Flask, render_template, request, send_file # from rdkit import Chem # from transformers import AutoModel # from bio_embeddings.embed import ProtTransBertBFDEmbedder # from modelstrc import CVanilla_RNN_Builder, get_mol_from_graph_list # # REPLACE WITH YOUR GOOGLE DRIVE FILE IDs # GDRIVE_FILE_IDS = { # "pytorch_model.bin": "11g7bAXYNxlPsnwC8_qsUIZITAjG85JXb", # Replace with actual ID # "config.json": "1ZfuhTnEuKAI1Z92m1QnDTOEQYNe9y24E", # "tokenizer_config.json": "1r4ncUsWBNQZVKp4zw97DLTf0AgRUiuFc", # "vocab.txt": "1G1UQIGMHvCC3OokCG1tl-cTxjIVqw04w", # "special_tokens_map.json": "1pINnV2P1eBmaC7X0A52UhjrmlJgzxqbl" # } # # LOCAL DIRECTORIES # bio_model_dir = os.path.join(os.getcwd(), "modelsBioembed") # For bio-embeddings # cvn_model_dir = os.path.join(os.getcwd(), "models_folder") # For CVanilla_RNN_Builder # os.makedirs(bio_model_dir, exist_ok=True) # os.makedirs(cvn_model_dir, exist_ok=True) # os.environ["TMPDIR"] = bio_model_dir # os.environ["TEMP"] = bio_model_dir # os.environ["TMP"] = bio_model_dir # UPLOAD_FOLDER = "Samples" # os.makedirs(UPLOAD_FOLDER, exist_ok=True) # app = Flask(__name__) # # DOWNLOAD MODEL FILES IF MISSING # for file_name, file_id in GDRIVE_FILE_IDS.items(): # file_path = os.path.join(bio_model_dir, file_name) # if not os.path.exists(file_path): # print(f"Downloading {file_name} from Google Drive...") # gdown.download(f"https://drive.google.com/uc?id={file_id}", file_path, quiet=False) # print(f"{file_name} downloaded!") # # BIO-EMBEDDING MODEL LOADING # try: # embedder = ProtTransBertBFDEmbedder(model_directory=bio_model_dir) # except Exception as e: # print(f"Error loading ProtTrans-BERT-BFD model: {e}") # embedder = None # def generate_bio_embeddings(sequence): # """Generate bio-embeddings for a given protein sequence.""" # if embedder is None: # return None # try: # embedding_protein = embedder.embed(sequence) # embedding_per_protein = embedder.reduce_per_protein(embedding_protein) # return np.array(embedding_per_protein).reshape(1, -1) # Reshape for model compatibility # except Exception as e: # print(f"Embedding Error: {e}") # return None # def generate_smiles(sequence, n_samples=100): # """Generate SMILES from a protein sequence.""" # start_time = time.time() # protein_embedding = generate_bio_embeddings(sequence) # if protein_embedding is None: # return None, "Embedding generation failed!" # # LOAD TRAINED CVanilla_RNN_Builder MODEL # model = CVanilla_RNN_Builder(cvn_model_dir, gpu_id=None) # # MOLECULAR GRAPH GENERATION # samples = model.sample(n_samples, c=protein_embedding[0], output_type='graph') # valid_samples = [sample for sample in samples if sample is not None] # # CONVERT TO SMILES # smiles_list = [ # Chem.MolToSmiles(mol) for mol in get_mol_from_graph_list(valid_samples, sanitize=True) if mol is not None # ] # if not smiles_list: # return None, "No valid SMILES generated!" # # SAVE TO FILE # filename = os.path.join(UPLOAD_FOLDER, "SMILES_GENERATED.txt") # with open(filename, "w") as file: # file.write("\n".join(smiles_list)) # elapsed_time = time.time() - start_time # return filename, elapsed_time # @app.route("/", methods=["GET", "POST"]) # def index(): # if request.method == "POST": # sequence = request.form["sequence"].strip() # if not sequence: # return render_template("index.html", message="Please enter a valid sequence.") # file_path, result = generate_smiles(sequence) # if file_path is None: # return render_template("index.html", message=f"Error: {result}") # return render_template("index.html", message="SMILES generated successfully!", file_path=file_path, time_taken=result) # return render_template("index.html") # @app.route("/download") # def download_file(): # file_path = os.path.join(UPLOAD_FOLDER, "SMILES_GENERATED.txt") # return send_file(file_path, as_attachment=True) # if __name__ == "__main__": # app.run(host="0.0.0.0", port=8000, debug=True) # import os # import time # import gdown # import numpy as np # from flask import Flask, render_template, request, send_file # from rdkit import Chem # from bio_embeddings.embed import ProtTransBertBFDEmbedder # from modelstrc import CVanilla_RNN_Builder, get_mol_from_graph_list # # DIRECTORIES # bio_model_dir = "/app/modelsBioembed" # cvn_model_dir = os.getenv("CVN_MODEL_DIR", "models_folder") # upload_folder = "Samples" # # Create directories if they don't exist # os.makedirs(bio_model_dir, exist_ok=True) # os.makedirs(cvn_model_dir, exist_ok=True) # os.makedirs(upload_folder, exist_ok=True) # # Google Drive file IDs for the model files # MODEL_FILES = { # "pytorch_model.bin": "1Z9XWk-kP5yrBRdBF_mQPQsM8drqQXafJ", # "config.json": "1adE428T5ZWeosoLsBeX7sVnn6m4VvVgL", # "tokenizer_config.json": "1USvLAZ3dM4TzVSRLjINk2_W989k1HDQ0", # "vocab.txt": "1tsdesfbr61UyLShV0ojvsXOp6VJ9Exrt", # "special_tokens_map.json": "1ChCwdz0NH8ODasqscGwCS9mY7urhQte2", # } # # Function to download missing files from Google Drive # def download_model_files(): # for filename, file_id in MODEL_FILES.items(): # file_path = os.path.join(bio_model_dir, filename) # if not os.path.exists(file_path): # print(f"Downloading {filename} from Google Drive...") # gdown.download(f"https://drive.google.com/uc?id={file_id}", file_path, quiet=False) # # Download required model files # download_model_files() # print("All model files are ready!") # # Load the ProtTrans-BERT-BFD Model # try: # embedder = ProtTransBertBFDEmbedder(model_directory=bio_model_dir) # print("ProtTrans-BERT-BFD model loaded successfully!") # except Exception as e: # print(f"Error loading model: {e}") # embedder = None # # Function to generate protein embeddings # def generate_bio_embeddings(sequence): # if embedder is None: # return None # try: # embedding_protein = embedder.embed(sequence) # embedding_per_protein = embedder.reduce_per_protein(embedding_protein) # return np.array(embedding_per_protein).reshape(1, -1) # except Exception as e: # print(f"Embedding Error: {e}") # return None # # Function to generate SMILES from a protein sequence # def generate_smiles(sequence, n_samples=100): # start_time = time.time() # protein_embedding = generate_bio_embeddings(sequence) # if protein_embedding is None: # return None, "Embedding generation failed!" # # Load the trained CVanilla_RNN_Builder model # model = CVanilla_RNN_Builder(cvn_model_dir, gpu_id=None) # # Generate molecular graphs # samples = model.sample(n_samples, c=protein_embedding[0], output_type='graph') # valid_samples = [sample for sample in samples if sample is not None] # # Convert to SMILES format # smiles_list = [ # Chem.MolToSmiles(mol) for mol in get_mol_from_graph_list(valid_samples, sanitize=True) if mol is not None # ] # if not smiles_list: # return None, "No valid SMILES generated!" # # Save SMILES to a file # filename = os.path.join(upload_folder, "SMILES_GENERATED.txt") # with open(filename, "w") as file: # file.write("\n".join(smiles_list)) # elapsed_time = time.time() - start_time # return filename, elapsed_time # # Initialize Flask App # app = Flask(__name__) # @app.route("/", methods=["GET", "POST"]) # def index(): # if request.method == "POST": # sequence = request.form["sequence"].strip() # if not sequence: # return render_template("index.html", message="Please enter a valid sequence.") # file_path, result = generate_smiles(sequence) # if file_path is None: # return render_template("index.html", message=f"Error: {result}") # return render_template("index.html", message="SMILES generated successfully!", file_path=file_path, time_taken=result) # return render_template("index.html") # @app.route("/download") # def download_file(): # file_path = os.path.join(upload_folder, "SMILES_GENERATED.txt") # return send_file(file_path, as_attachment=True) # if __name__ == "__main__": # app.run(host="0.0.0.0", port=8000) # import os # import time # import requests # from flask import Flask, render_template, request, send_file # from rdkit import Chem # from bio_embeddings.embed import ProtTransBertBFDEmbedder # from modelstrc import CVanilla_RNN_Builder, get_mol_from_graph_list # # DIRECTORIES # bio_model_dir = "/app/modelsBioembed" # cvn_model_dir = os.getenv("CVN_MODEL_DIR", "models_folder") # upload_folder = "Samples" # # Create directories if they don't exist # os.makedirs(bio_model_dir, exist_ok=True) # os.makedirs(cvn_model_dir, exist_ok=True) # os.makedirs(upload_folder, exist_ok=True) # # Google Drive file IDs for the model files # MODEL_FILES = { # "pytorch_model.bin": "1Z9XWk-kP5yrBRdBF_mQPQsM8drqQXafJ", # "config.json": "1adE428T5ZWeosoLsBeX7sVnn6m4VvVgL", # "tokenizer_config.json": "1USvLAZ3dM4TzVSRLjINk2_W989k1HDQ0", # "vocab.txt": "1tsdesfbr61UyLShV0ojvsXOp6VJ9Exrt", # "special_tokens_map.json": "1ChCwdz0NH8ODasqscGwCS9mY7urhQte2", # } # # Function to download a file from Google Drive # def download_file_from_google_drive(file_id, destination): # URL = f"https://drive.google.com/uc?export=download&id={file_id}" # session = requests.Session() # response = session.get(URL, stream=True) # # Check if the request was successful # if response.status_code == 200: # with open(destination, "wb") as f: # for chunk in response.iter_content(chunk_size=128): # f.write(chunk) # print(f"Downloaded {destination}") # else: # print(f"Failed to download {destination}") # # Function to download missing files from Google Drive # def download_model_files(): # for filename, file_id in MODEL_FILES.items(): # file_path = os.path.join(bio_model_dir, filename) # if not os.path.exists(file_path): # print(f"Downloading {filename} from Google Drive...") # download_file_from_google_drive(file_id, file_path) # # Download required model files # download_model_files() # print("All model files are ready!") # # Load the ProtTrans-BERT-BFD Model # try: # embedder = ProtTransBertBFDEmbedder(model_directory=bio_model_dir) # print("ProtTrans-BERT-BFD model loaded successfully!") # except Exception as e: # print(f"Error loading model: {e}") # embedder = None # # Function to generate protein embeddings # def generate_bio_embeddings(sequence): # if embedder is None: # return None # try: # embedding_protein = embedder.embed(sequence) # embedding_per_protein = embedder.reduce_per_protein(embedding_protein) # return np.array(embedding_per_protein).reshape(1, -1) # except Exception as e: # print(f"Embedding Error: {e}") # return None # # Function to generate SMILES from a protein sequence # def generate_smiles(sequence, n_samples=100): # start_time = time.time() # protein_embedding = generate_bio_embeddings(sequence) # if protein_embedding is None: # return None, "Embedding generation failed!" # # Load the trained CVanilla_RNN_Builder model # model = CVanilla_RNN_Builder(cvn_model_dir, gpu_id=None) # # Generate molecular graphs # samples = model.sample(n_samples, c=protein_embedding[0], output_type='graph') # valid_samples = [sample for sample in samples if sample is not None] # # Convert to SMILES format # smiles_list = [ # Chem.MolToSmiles(mol) for mol in get_mol_from_graph_list(valid_samples, sanitize=True) if mol is not None # ] # if not smiles_list: # return None, "No valid SMILES generated!" # # Save SMILES to a file # filename = os.path.join(upload_folder, "SMILES_GENERATED.txt") # with open(filename, "w") as file: # file.write("\n".join(smiles_list)) # elapsed_time = time.time() - start_time # return filename, elapsed_time # # Initialize Flask App # app = Flask(__name__) # @app.route("/", methods=["GET", "POST"]) # def index(): # if request.method == "POST": # sequence = request.form["sequence"].strip() # if not sequence: # return render_template("index.html", message="Please enter a valid sequence.") # file_path, result = generate_smiles(sequence) # if file_path is None: # return render_template("index.html", message=f"Error: {result}") # return render_template("index.html", message="SMILES generated successfully!", file_path=file_path, time_taken=result) # return render_template("index.html") # @app.route("/download") # def download_file(): # file_path = os.path.join(upload_folder, "SMILES_GENERATED.txt") # return send_file(file_path, as_attachment=True) # if __name__ == "__main__": # app.run(host="0.0.0.0", port=8000)