File size: 4,870 Bytes
7c00697
790ea27
7c00697
 
 
 
 
 
790ea27
7c00697
790ea27
 
 
7c00697
790ea27
 
 
 
 
 
 
 
 
 
 
 
 
7c00697
790ea27
7c00697
790ea27
 
 
 
7c00697
 
 
790ea27
 
 
ac09baf
790ea27
 
 
 
 
ac09baf
790ea27
 
 
 
ac09baf
790ea27
 
ac09baf
790ea27
ac09baf
790ea27
 
 
 
ac09baf
790ea27
ac09baf
 
 
 
790ea27
ac09baf
790ea27
ac09baf
790ea27
ac09baf
 
790ea27
 
ac09baf
 
 
 
 
790ea27
 
 
 
 
ac09baf
790ea27
 
 
ac09baf
790ea27
 
 
 
 
 
 
 
ac09baf
790ea27
7c00697
790ea27
7c00697
 
ac09baf
790ea27
 
ac09baf
 
790ea27
 
ac09baf
790ea27
ac09baf
790ea27
 
ac09baf
790ea27
 
 
 
 
7c00697
 
ac09baf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoModelForMaskedLM, AutoModelForCausalLM
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.Draw import rdMolDraw2D
import base64
import re
import py3Dmol
import random

# Load multiple models
bio_gpt = pipeline("text-generation", model="microsoft/BioGPT-Large", device=0)
pubmed_bert = pipeline("fill-mask", model="microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext")

# Helper function to clean and parse generated SMILES
def extract_valid_smiles(generated_text, count=3):
    smiles_matches = re.findall(r"(?<![A-Za-z0-9])[A-Za-z0-9@+\-\[\]\(\)=#$]{5,}(?![A-Za-z0-9])", generated_text)
    valid_smiles = []
    for match in smiles_matches:
        mol = Chem.MolFromSmiles(match)
        if mol:
            valid_smiles.append(match)
        if len(valid_smiles) >= count:
            break
    return valid_smiles

# Drug discovery function
def drug_discovery(disease, symptoms):
    # Prompt to BioGPT for biomedical insights
    prompt = (
        f"Imagine a novel disease '{disease}' with symptoms: {symptoms}.\n"
        f"Write a short research summary covering:\n"
        f"- Hypothetical causes\n- Suggested diagnostic methods\n- Possible treatments\n"
        f"- Names of potential experimental drugs\n"
    )

    try:
        literature = bio_gpt(prompt, max_length=512, temperature=0.7)[0]["generated_text"]
    except:
        literature = "⚠️ Error: Could not retrieve literature using BioGPT."

    # Prompt for SMILES + Drug Names using BioGPT
    molecule_prompt = (
        f"Give 3 unique experimental drug-like SMILES strings with hypothetical drug names for treating '{disease}' "
        f"with symptoms: {symptoms}. Format: <SMILES> - <DrugName>"
    )

    try:
        smiles_response = bio_gpt(molecule_prompt, max_length=100)[0]["generated_text"]
    except:
        smiles_response = "C1=CC=CC=C1 - DemoDrug"

    entries = re.findall(r"([A-Za-z0-9@+\-\[\]\(\)=#$]{5,})\s*-\s*(\w+)", smiles_response)
    results = []

    for smiles, name in entries[:3]:  # Limit to 3 molecules
        mol = Chem.MolFromSmiles(smiles)
        if not mol:
            continue

        # 2D drawing
        AllChem.Compute2DCoords(mol)
        drawer = rdMolDraw2D.MolDraw2DCairo(300, 300)
        drawer.DrawMolecule(mol)
        drawer.FinishDrawing()
        img_data = drawer.GetDrawingText()
        img_base64 = base64.b64encode(img_data).decode("utf-8")
        img_html = f'''<img src="data:image/png;base64,{img_base64}" width="200" style="margin:10px; border:1px solid #ccc; border-radius:12px;">'''

        # 3D molecule view
        mol3d = Chem.AddHs(mol)
        AllChem.EmbedMolecule(mol3d)
        AllChem.UFFOptimizeMolecule(mol3d)
        mb = Chem.MolToMolBlock(mol3d)

        viewer = py3Dmol.view(width=300, height=300)
        viewer.addModel(mb, "mol")
        viewer.setStyle({"stick": {"colorscheme": "cyanCarbon"}})
        viewer.setBackgroundColor("black")
        viewer.zoomTo()
        viewer_html_raw = viewer._make_html()
        viewer_html = f'''<iframe srcdoc="{viewer_html_raw.replace('"', '&quot;')}" 
                          width="320" height="320" frameborder="0" 
                          style="border-radius: 12px; box-shadow: 0 6px 20px rgba(0,255,255,0.35);"></iframe>'''

        results.append((f"{name}", smiles, img_html, viewer_html))

    # If no valid molecule found
    if not results:
        results.append(("DemoDrug", "C1=CC=CC=C1", "", ""))

    # Combine outputs
    combined_molecules = ""
    combined_imgs = ""
    combined_3d = ""
    for drug_name, smiles, img, viewer in results:
        combined_molecules += f"πŸ’Š {drug_name}: {smiles}\n"
        combined_imgs += img
        combined_3d += viewer

    return literature.strip(), combined_molecules.strip(), combined_imgs, combined_3d

# Gradio UI
iface = gr.Interface(
    fn=drug_discovery,
    inputs=[
        gr.Textbox(label="🦠 Enter Novel Disease Name", value="Neurospike Fever"),
        gr.Textbox(label="🩺 Enter Symptoms", value="fever, neural tremors, fatigue"),
    ],
    outputs=[
        gr.Textbox(label="πŸ“– Biomedical Insights"),
        gr.Textbox(label="πŸ§ͺ Molecule Names + SMILES"),
        gr.HTML(label="πŸ–ΌοΈ 2D Molecules"),
        gr.HTML(label="πŸ”¬ 3D Molecules"),
    ],
    title="🧬 Experimental Drug Discovery for Unknown Diseases",
    description="Enter an unknown disease and symptoms. The AI (BioGPT + PubMedBERT) will generate biomedical research, name possible experimental drugs, and visualize 2D + 3D molecular structures.",
    css="""
    body { background-color: #0d1117; color: #ffffff; font-family: Segoe UI, sans-serif; }
    .gradio-container { padding: 20px; }
    iframe, img { margin: 8px; }
    """,
    allow_flagging="never"
)

iface.launch(share=True)