import gradio as gr from transformers import pipeline, AutoTokenizer, AutoModelForMaskedLM, AutoModelForCausalLM from rdkit import Chem from rdkit.Chem import AllChem from rdkit.Chem.Draw import rdMolDraw2D import base64 import re import py3Dmol import random # Load multiple models bio_gpt = pipeline("text-generation", model="microsoft/BioGPT-Large", device=0) pubmed_bert = pipeline("fill-mask", model="microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext") # Helper function to clean and parse generated SMILES def extract_valid_smiles(generated_text, count=3): smiles_matches = re.findall(r"(?= count: break return valid_smiles # Drug discovery function def drug_discovery(disease, symptoms): # Prompt to BioGPT for biomedical insights prompt = ( f"Imagine a novel disease '{disease}' with symptoms: {symptoms}.\n" f"Write a short research summary covering:\n" f"- Hypothetical causes\n- Suggested diagnostic methods\n- Possible treatments\n" f"- Names of potential experimental drugs\n" ) try: literature = bio_gpt(prompt, max_length=512, temperature=0.7)[0]["generated_text"] except: literature = "โš ๏ธ Error: Could not retrieve literature using BioGPT." # Prompt for SMILES + Drug Names using BioGPT molecule_prompt = ( f"Give 3 unique experimental drug-like SMILES strings with hypothetical drug names for treating '{disease}' " f"with symptoms: {symptoms}. Format: - " ) try: smiles_response = bio_gpt(molecule_prompt, max_length=100)[0]["generated_text"] except: smiles_response = "C1=CC=CC=C1 - DemoDrug" entries = re.findall(r"([A-Za-z0-9@+\-\[\]\(\)=#$]{5,})\s*-\s*(\w+)", smiles_response) results = [] for smiles, name in entries[:3]: # Limit to 3 molecules mol = Chem.MolFromSmiles(smiles) if not mol: continue # 2D drawing AllChem.Compute2DCoords(mol) drawer = rdMolDraw2D.MolDraw2DCairo(300, 300) drawer.DrawMolecule(mol) drawer.FinishDrawing() img_data = drawer.GetDrawingText() img_base64 = base64.b64encode(img_data).decode("utf-8") img_html = f'''''' # 3D molecule view mol3d = Chem.AddHs(mol) AllChem.EmbedMolecule(mol3d) AllChem.UFFOptimizeMolecule(mol3d) mb = Chem.MolToMolBlock(mol3d) viewer = py3Dmol.view(width=300, height=300) viewer.addModel(mb, "mol") viewer.setStyle({"stick": {"colorscheme": "cyanCarbon"}}) viewer.setBackgroundColor("black") viewer.zoomTo() viewer_html_raw = viewer._make_html() viewer_html = f'''''' results.append((f"{name}", smiles, img_html, viewer_html)) # If no valid molecule found if not results: results.append(("DemoDrug", "C1=CC=CC=C1", "", "")) # Combine outputs combined_molecules = "" combined_imgs = "" combined_3d = "" for drug_name, smiles, img, viewer in results: combined_molecules += f"๐Ÿ’Š {drug_name}: {smiles}\n" combined_imgs += img combined_3d += viewer return literature.strip(), combined_molecules.strip(), combined_imgs, combined_3d # Gradio UI iface = gr.Interface( fn=drug_discovery, inputs=[ gr.Textbox(label="๐Ÿฆ  Enter Novel Disease Name", value="Neurospike Fever"), gr.Textbox(label="๐Ÿฉบ Enter Symptoms", value="fever, neural tremors, fatigue"), ], outputs=[ gr.Textbox(label="๐Ÿ“– Biomedical Insights"), gr.Textbox(label="๐Ÿงช Molecule Names + SMILES"), gr.HTML(label="๐Ÿ–ผ๏ธ 2D Molecules"), gr.HTML(label="๐Ÿ”ฌ 3D Molecules"), ], title="๐Ÿงฌ Experimental Drug Discovery for Unknown Diseases", description="Enter an unknown disease and symptoms. The AI (BioGPT + PubMedBERT) will generate biomedical research, name possible experimental drugs, and visualize 2D + 3D molecular structures.", css=""" body { background-color: #0d1117; color: #ffffff; font-family: Segoe UI, sans-serif; } .gradio-container { padding: 20px; } iframe, img { margin: 8px; } """, allow_flagging="never" ) iface.launch(share=True)