anaghanagesh commited on
Commit
790ea27
Β·
verified Β·
1 Parent(s): ac09baf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -53
app.py CHANGED
@@ -1,101 +1,126 @@
1
  import gradio as gr
2
- from transformers import pipeline
3
  from rdkit import Chem
4
  from rdkit.Chem import AllChem
5
  from rdkit.Chem.Draw import rdMolDraw2D
6
  import base64
7
  import re
8
  import py3Dmol
9
- import time
10
 
11
- # Load model once
12
- bio_gpt = pipeline("text-generation", model="microsoft/BioGPT-Large")
 
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  def drug_discovery(disease, symptoms):
15
- # Simplified and efficient medical prompt
16
  prompt = (
17
- f"You're a biomedical AI. A new disease shows symptoms: '{symptoms}'. "
18
- f"Suggest 5 generic drug names and 5 SMILES strings that could help treat this. "
19
- f"List drug names first, then SMILES strings in separate lines like:\n"
20
- f"Drugs: Aspirin, Ibuprofen, Paracetamol, ...\n"
21
- f"SMILES: C1=CC=CC=C1 C(C(=O)O)N ..."
22
  )
23
 
24
  try:
25
- start = time.time()
26
- result = bio_gpt(prompt, max_length=150, do_sample=True, temperature=0.6)[0]['generated_text']
27
- except Exception as e:
28
- return f"BioGPT error: {e}", "", "", ""
29
-
30
- # Extract drug names and SMILES
31
- drugs_match = re.search(r"Drugs:\s*(.+)", result)
32
- smiles_match = re.search(r"SMILES:\s*(.+)", result)
33
 
34
- drug_names = drugs_match.group(1).strip() if drugs_match else "Unknown"
35
- raw_smiles = smiles_match.group(1).strip() if smiles_match else "C1=CC=CC=C1"
 
 
 
36
 
37
- smiles_list = re.findall(r"(?<![A-Za-z0-9])[A-Za-z0-9@+\-\[\]\(\)=#$]{5,}(?![A-Za-z0-9])", raw_smiles)
38
- smiles_list = list({sm for sm in smiles_list if Chem.MolFromSmiles(sm)})[:3]
 
 
39
 
40
- if not smiles_list:
41
- smiles_list = ["C1=CC=CC=C1"]
42
 
43
- img_html, viewer_htmls = "", ""
44
- for smiles in smiles_list:
45
  mol = Chem.MolFromSmiles(smiles)
 
 
 
 
46
  AllChem.Compute2DCoords(mol)
47
- drawer = rdMolDraw2D.MolDraw2DCairo(250, 250)
48
  drawer.DrawMolecule(mol)
49
  drawer.FinishDrawing()
50
  img_data = drawer.GetDrawingText()
51
  img_base64 = base64.b64encode(img_data).decode("utf-8")
52
- img_html += f'''
53
- <div style="display:inline-block; margin:10px;">
54
- <img src="data:image/png;base64,{img_base64}" width="120" height="120">
55
- <p style="color:white; font-size:12px;">{smiles}</p>
56
- </div>'''
57
 
58
- # 3D View
59
  mol3d = Chem.AddHs(mol)
60
- AllChem.EmbedMolecule(mol3d, randomSeed=42)
61
  AllChem.UFFOptimizeMolecule(mol3d)
62
  mb = Chem.MolToMolBlock(mol3d)
63
- viewer = py3Dmol.view(width=240, height=240)
 
64
  viewer.addModel(mb, "mol")
65
  viewer.setStyle({"stick": {"colorscheme": "cyanCarbon"}})
66
  viewer.setBackgroundColor("black")
67
  viewer.zoomTo()
68
  viewer_html_raw = viewer._make_html()
69
- viewer_htmls += f'''
70
- <div style="display:inline-block; margin:10px;">
71
- <iframe srcdoc="{viewer_html_raw.replace('"', '&quot;')}" width="240" height="240" frameborder="0"></iframe>
72
- </div>'''
 
73
 
74
- duration = round(time.time() - start, 2)
75
- literature_summary = f"πŸ“‹ Drug candidates (auto-generated in {duration}s):\n{drug_names}"
 
76
 
77
- return literature_summary, ", ".join(smiles_list), img_html, viewer_htmls
 
 
 
 
 
 
 
78
 
79
- # Gradio UI setup
80
 
 
81
  iface = gr.Interface(
82
  fn=drug_discovery,
83
  inputs=[
84
- gr.Textbox(label="🧬 Enter Unknown Disease or Name", value="X-disease"),
85
- gr.Textbox(label="πŸ“ Symptoms", value="fever, joint pain")
86
  ],
87
  outputs=[
88
- gr.Textbox(label="πŸ”– AI Literature Summary"),
89
- gr.Textbox(label="πŸ§ͺ SMILES List"),
90
  gr.HTML(label="πŸ–ΌοΈ 2D Molecules"),
91
- gr.HTML(label="πŸ”¬ 3D Molecules")
92
  ],
93
- title="πŸ§ͺ Drug Discovery for Unknown Diseases",
94
- description="BioGPT + RDKit-powered system to suggest potential drug molecules for unknown or rare diseases.",
95
  css="""
96
- body { background-color: #111; color: #eee; }
97
- .gradio-container { animation: fadeIn 1.5s ease-in-out; }
98
- """
 
 
99
  )
100
 
101
  iface.launch(share=True)
 
1
  import gradio as gr
2
+ from transformers import pipeline, AutoTokenizer, AutoModelForMaskedLM, AutoModelForCausalLM
3
  from rdkit import Chem
4
  from rdkit.Chem import AllChem
5
  from rdkit.Chem.Draw import rdMolDraw2D
6
  import base64
7
  import re
8
  import py3Dmol
9
+ import random
10
 
11
+ # Load multiple models
12
+ bio_gpt = pipeline("text-generation", model="microsoft/BioGPT-Large", device=0)
13
+ pubmed_bert = pipeline("fill-mask", model="microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext")
14
 
15
+ # Helper function to clean and parse generated SMILES
16
+ def extract_valid_smiles(generated_text, count=3):
17
+ smiles_matches = re.findall(r"(?<![A-Za-z0-9])[A-Za-z0-9@+\-\[\]\(\)=#$]{5,}(?![A-Za-z0-9])", generated_text)
18
+ valid_smiles = []
19
+ for match in smiles_matches:
20
+ mol = Chem.MolFromSmiles(match)
21
+ if mol:
22
+ valid_smiles.append(match)
23
+ if len(valid_smiles) >= count:
24
+ break
25
+ return valid_smiles
26
+
27
+ # Drug discovery function
28
  def drug_discovery(disease, symptoms):
29
+ # Prompt to BioGPT for biomedical insights
30
  prompt = (
31
+ f"Imagine a novel disease '{disease}' with symptoms: {symptoms}.\n"
32
+ f"Write a short research summary covering:\n"
33
+ f"- Hypothetical causes\n- Suggested diagnostic methods\n- Possible treatments\n"
34
+ f"- Names of potential experimental drugs\n"
 
35
  )
36
 
37
  try:
38
+ literature = bio_gpt(prompt, max_length=512, temperature=0.7)[0]["generated_text"]
39
+ except:
40
+ literature = "⚠️ Error: Could not retrieve literature using BioGPT."
 
 
 
 
 
41
 
42
+ # Prompt for SMILES + Drug Names using BioGPT
43
+ molecule_prompt = (
44
+ f"Give 3 unique experimental drug-like SMILES strings with hypothetical drug names for treating '{disease}' "
45
+ f"with symptoms: {symptoms}. Format: <SMILES> - <DrugName>"
46
+ )
47
 
48
+ try:
49
+ smiles_response = bio_gpt(molecule_prompt, max_length=100)[0]["generated_text"]
50
+ except:
51
+ smiles_response = "C1=CC=CC=C1 - DemoDrug"
52
 
53
+ entries = re.findall(r"([A-Za-z0-9@+\-\[\]\(\)=#$]{5,})\s*-\s*(\w+)", smiles_response)
54
+ results = []
55
 
56
+ for smiles, name in entries[:3]: # Limit to 3 molecules
 
57
  mol = Chem.MolFromSmiles(smiles)
58
+ if not mol:
59
+ continue
60
+
61
+ # 2D drawing
62
  AllChem.Compute2DCoords(mol)
63
+ drawer = rdMolDraw2D.MolDraw2DCairo(300, 300)
64
  drawer.DrawMolecule(mol)
65
  drawer.FinishDrawing()
66
  img_data = drawer.GetDrawingText()
67
  img_base64 = base64.b64encode(img_data).decode("utf-8")
68
+ img_html = f'''<img src="data:image/png;base64,{img_base64}" width="200" style="margin:10px; border:1px solid #ccc; border-radius:12px;">'''
 
 
 
 
69
 
70
+ # 3D molecule view
71
  mol3d = Chem.AddHs(mol)
72
+ AllChem.EmbedMolecule(mol3d)
73
  AllChem.UFFOptimizeMolecule(mol3d)
74
  mb = Chem.MolToMolBlock(mol3d)
75
+
76
+ viewer = py3Dmol.view(width=300, height=300)
77
  viewer.addModel(mb, "mol")
78
  viewer.setStyle({"stick": {"colorscheme": "cyanCarbon"}})
79
  viewer.setBackgroundColor("black")
80
  viewer.zoomTo()
81
  viewer_html_raw = viewer._make_html()
82
+ viewer_html = f'''<iframe srcdoc="{viewer_html_raw.replace('"', '&quot;')}"
83
+ width="320" height="320" frameborder="0"
84
+ style="border-radius: 12px; box-shadow: 0 6px 20px rgba(0,255,255,0.35);"></iframe>'''
85
+
86
+ results.append((f"{name}", smiles, img_html, viewer_html))
87
 
88
+ # If no valid molecule found
89
+ if not results:
90
+ results.append(("DemoDrug", "C1=CC=CC=C1", "", ""))
91
 
92
+ # Combine outputs
93
+ combined_molecules = ""
94
+ combined_imgs = ""
95
+ combined_3d = ""
96
+ for drug_name, smiles, img, viewer in results:
97
+ combined_molecules += f"πŸ’Š {drug_name}: {smiles}\n"
98
+ combined_imgs += img
99
+ combined_3d += viewer
100
 
101
+ return literature.strip(), combined_molecules.strip(), combined_imgs, combined_3d
102
 
103
+ # Gradio UI
104
  iface = gr.Interface(
105
  fn=drug_discovery,
106
  inputs=[
107
+ gr.Textbox(label="🦠 Enter Novel Disease Name", value="Neurospike Fever"),
108
+ gr.Textbox(label="🩺 Enter Symptoms", value="fever, neural tremors, fatigue"),
109
  ],
110
  outputs=[
111
+ gr.Textbox(label="πŸ“– Biomedical Insights"),
112
+ gr.Textbox(label="πŸ§ͺ Molecule Names + SMILES"),
113
  gr.HTML(label="πŸ–ΌοΈ 2D Molecules"),
114
+ gr.HTML(label="πŸ”¬ 3D Molecules"),
115
  ],
116
+ title="🧬 Experimental Drug Discovery for Unknown Diseases",
117
+ description="Enter an unknown disease and symptoms. The AI (BioGPT + PubMedBERT) will generate biomedical research, name possible experimental drugs, and visualize 2D + 3D molecular structures.",
118
  css="""
119
+ body { background-color: #0d1117; color: #ffffff; font-family: Segoe UI, sans-serif; }
120
+ .gradio-container { padding: 20px; }
121
+ iframe, img { margin: 8px; }
122
+ """,
123
+ allow_flagging="never"
124
  )
125
 
126
  iface.launch(share=True)