Update app.py
Browse files
app.py
CHANGED
@@ -1,101 +1,126 @@
|
|
1 |
import gradio as gr
|
2 |
-
from transformers import pipeline
|
3 |
from rdkit import Chem
|
4 |
from rdkit.Chem import AllChem
|
5 |
from rdkit.Chem.Draw import rdMolDraw2D
|
6 |
import base64
|
7 |
import re
|
8 |
import py3Dmol
|
9 |
-
import
|
10 |
|
11 |
-
# Load
|
12 |
-
bio_gpt = pipeline("text-generation", model="microsoft/BioGPT-Large")
|
|
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
def drug_discovery(disease, symptoms):
|
15 |
-
#
|
16 |
prompt = (
|
17 |
-
f"
|
18 |
-
f"
|
19 |
-
f"
|
20 |
-
f"
|
21 |
-
f"SMILES: C1=CC=CC=C1 C(C(=O)O)N ..."
|
22 |
)
|
23 |
|
24 |
try:
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
return f"BioGPT error: {e}", "", "", ""
|
29 |
-
|
30 |
-
# Extract drug names and SMILES
|
31 |
-
drugs_match = re.search(r"Drugs:\s*(.+)", result)
|
32 |
-
smiles_match = re.search(r"SMILES:\s*(.+)", result)
|
33 |
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
36 |
|
37 |
-
|
38 |
-
|
|
|
|
|
39 |
|
40 |
-
|
41 |
-
|
42 |
|
43 |
-
|
44 |
-
for smiles in smiles_list:
|
45 |
mol = Chem.MolFromSmiles(smiles)
|
|
|
|
|
|
|
|
|
46 |
AllChem.Compute2DCoords(mol)
|
47 |
-
drawer = rdMolDraw2D.MolDraw2DCairo(
|
48 |
drawer.DrawMolecule(mol)
|
49 |
drawer.FinishDrawing()
|
50 |
img_data = drawer.GetDrawingText()
|
51 |
img_base64 = base64.b64encode(img_data).decode("utf-8")
|
52 |
-
img_html
|
53 |
-
<div style="display:inline-block; margin:10px;">
|
54 |
-
<img src="data:image/png;base64,{img_base64}" width="120" height="120">
|
55 |
-
<p style="color:white; font-size:12px;">{smiles}</p>
|
56 |
-
</div>'''
|
57 |
|
58 |
-
# 3D
|
59 |
mol3d = Chem.AddHs(mol)
|
60 |
-
AllChem.EmbedMolecule(mol3d
|
61 |
AllChem.UFFOptimizeMolecule(mol3d)
|
62 |
mb = Chem.MolToMolBlock(mol3d)
|
63 |
-
|
|
|
64 |
viewer.addModel(mb, "mol")
|
65 |
viewer.setStyle({"stick": {"colorscheme": "cyanCarbon"}})
|
66 |
viewer.setBackgroundColor("black")
|
67 |
viewer.zoomTo()
|
68 |
viewer_html_raw = viewer._make_html()
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
|
|
73 |
|
74 |
-
|
75 |
-
|
|
|
76 |
|
77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
-
|
80 |
|
|
|
81 |
iface = gr.Interface(
|
82 |
fn=drug_discovery,
|
83 |
inputs=[
|
84 |
-
gr.Textbox(label="
|
85 |
-
gr.Textbox(label="
|
86 |
],
|
87 |
outputs=[
|
88 |
-
gr.Textbox(label="
|
89 |
-
gr.Textbox(label="π§ͺ SMILES
|
90 |
gr.HTML(label="πΌοΈ 2D Molecules"),
|
91 |
-
gr.HTML(label="π¬ 3D Molecules")
|
92 |
],
|
93 |
-
title="
|
94 |
-
description="BioGPT +
|
95 |
css="""
|
96 |
-
|
97 |
-
|
98 |
-
|
|
|
|
|
99 |
)
|
100 |
|
101 |
iface.launch(share=True)
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import pipeline, AutoTokenizer, AutoModelForMaskedLM, AutoModelForCausalLM
|
3 |
from rdkit import Chem
|
4 |
from rdkit.Chem import AllChem
|
5 |
from rdkit.Chem.Draw import rdMolDraw2D
|
6 |
import base64
|
7 |
import re
|
8 |
import py3Dmol
|
9 |
+
import random
|
10 |
|
11 |
+
# Load multiple models
|
12 |
+
bio_gpt = pipeline("text-generation", model="microsoft/BioGPT-Large", device=0)
|
13 |
+
pubmed_bert = pipeline("fill-mask", model="microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext")
|
14 |
|
15 |
+
# Helper function to clean and parse generated SMILES
|
16 |
+
def extract_valid_smiles(generated_text, count=3):
|
17 |
+
smiles_matches = re.findall(r"(?<![A-Za-z0-9])[A-Za-z0-9@+\-\[\]\(\)=#$]{5,}(?![A-Za-z0-9])", generated_text)
|
18 |
+
valid_smiles = []
|
19 |
+
for match in smiles_matches:
|
20 |
+
mol = Chem.MolFromSmiles(match)
|
21 |
+
if mol:
|
22 |
+
valid_smiles.append(match)
|
23 |
+
if len(valid_smiles) >= count:
|
24 |
+
break
|
25 |
+
return valid_smiles
|
26 |
+
|
27 |
+
# Drug discovery function
|
28 |
def drug_discovery(disease, symptoms):
|
29 |
+
# Prompt to BioGPT for biomedical insights
|
30 |
prompt = (
|
31 |
+
f"Imagine a novel disease '{disease}' with symptoms: {symptoms}.\n"
|
32 |
+
f"Write a short research summary covering:\n"
|
33 |
+
f"- Hypothetical causes\n- Suggested diagnostic methods\n- Possible treatments\n"
|
34 |
+
f"- Names of potential experimental drugs\n"
|
|
|
35 |
)
|
36 |
|
37 |
try:
|
38 |
+
literature = bio_gpt(prompt, max_length=512, temperature=0.7)[0]["generated_text"]
|
39 |
+
except:
|
40 |
+
literature = "β οΈ Error: Could not retrieve literature using BioGPT."
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
+
# Prompt for SMILES + Drug Names using BioGPT
|
43 |
+
molecule_prompt = (
|
44 |
+
f"Give 3 unique experimental drug-like SMILES strings with hypothetical drug names for treating '{disease}' "
|
45 |
+
f"with symptoms: {symptoms}. Format: <SMILES> - <DrugName>"
|
46 |
+
)
|
47 |
|
48 |
+
try:
|
49 |
+
smiles_response = bio_gpt(molecule_prompt, max_length=100)[0]["generated_text"]
|
50 |
+
except:
|
51 |
+
smiles_response = "C1=CC=CC=C1 - DemoDrug"
|
52 |
|
53 |
+
entries = re.findall(r"([A-Za-z0-9@+\-\[\]\(\)=#$]{5,})\s*-\s*(\w+)", smiles_response)
|
54 |
+
results = []
|
55 |
|
56 |
+
for smiles, name in entries[:3]: # Limit to 3 molecules
|
|
|
57 |
mol = Chem.MolFromSmiles(smiles)
|
58 |
+
if not mol:
|
59 |
+
continue
|
60 |
+
|
61 |
+
# 2D drawing
|
62 |
AllChem.Compute2DCoords(mol)
|
63 |
+
drawer = rdMolDraw2D.MolDraw2DCairo(300, 300)
|
64 |
drawer.DrawMolecule(mol)
|
65 |
drawer.FinishDrawing()
|
66 |
img_data = drawer.GetDrawingText()
|
67 |
img_base64 = base64.b64encode(img_data).decode("utf-8")
|
68 |
+
img_html = f'''<img src="data:image/png;base64,{img_base64}" width="200" style="margin:10px; border:1px solid #ccc; border-radius:12px;">'''
|
|
|
|
|
|
|
|
|
69 |
|
70 |
+
# 3D molecule view
|
71 |
mol3d = Chem.AddHs(mol)
|
72 |
+
AllChem.EmbedMolecule(mol3d)
|
73 |
AllChem.UFFOptimizeMolecule(mol3d)
|
74 |
mb = Chem.MolToMolBlock(mol3d)
|
75 |
+
|
76 |
+
viewer = py3Dmol.view(width=300, height=300)
|
77 |
viewer.addModel(mb, "mol")
|
78 |
viewer.setStyle({"stick": {"colorscheme": "cyanCarbon"}})
|
79 |
viewer.setBackgroundColor("black")
|
80 |
viewer.zoomTo()
|
81 |
viewer_html_raw = viewer._make_html()
|
82 |
+
viewer_html = f'''<iframe srcdoc="{viewer_html_raw.replace('"', '"')}"
|
83 |
+
width="320" height="320" frameborder="0"
|
84 |
+
style="border-radius: 12px; box-shadow: 0 6px 20px rgba(0,255,255,0.35);"></iframe>'''
|
85 |
+
|
86 |
+
results.append((f"{name}", smiles, img_html, viewer_html))
|
87 |
|
88 |
+
# If no valid molecule found
|
89 |
+
if not results:
|
90 |
+
results.append(("DemoDrug", "C1=CC=CC=C1", "", ""))
|
91 |
|
92 |
+
# Combine outputs
|
93 |
+
combined_molecules = ""
|
94 |
+
combined_imgs = ""
|
95 |
+
combined_3d = ""
|
96 |
+
for drug_name, smiles, img, viewer in results:
|
97 |
+
combined_molecules += f"π {drug_name}: {smiles}\n"
|
98 |
+
combined_imgs += img
|
99 |
+
combined_3d += viewer
|
100 |
|
101 |
+
return literature.strip(), combined_molecules.strip(), combined_imgs, combined_3d
|
102 |
|
103 |
+
# Gradio UI
|
104 |
iface = gr.Interface(
|
105 |
fn=drug_discovery,
|
106 |
inputs=[
|
107 |
+
gr.Textbox(label="π¦ Enter Novel Disease Name", value="Neurospike Fever"),
|
108 |
+
gr.Textbox(label="π©Ί Enter Symptoms", value="fever, neural tremors, fatigue"),
|
109 |
],
|
110 |
outputs=[
|
111 |
+
gr.Textbox(label="π Biomedical Insights"),
|
112 |
+
gr.Textbox(label="π§ͺ Molecule Names + SMILES"),
|
113 |
gr.HTML(label="πΌοΈ 2D Molecules"),
|
114 |
+
gr.HTML(label="π¬ 3D Molecules"),
|
115 |
],
|
116 |
+
title="𧬠Experimental Drug Discovery for Unknown Diseases",
|
117 |
+
description="Enter an unknown disease and symptoms. The AI (BioGPT + PubMedBERT) will generate biomedical research, name possible experimental drugs, and visualize 2D + 3D molecular structures.",
|
118 |
css="""
|
119 |
+
body { background-color: #0d1117; color: #ffffff; font-family: Segoe UI, sans-serif; }
|
120 |
+
.gradio-container { padding: 20px; }
|
121 |
+
iframe, img { margin: 8px; }
|
122 |
+
""",
|
123 |
+
allow_flagging="never"
|
124 |
)
|
125 |
|
126 |
iface.launch(share=True)
|