File size: 9,999 Bytes
8bae60c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
import os
import json
import io
from typing import Dict, List

import pandas as pd
import streamlit as st
from dotenv import load_dotenv
from openai import OpenAI

import llm_calls
from llm_calls import validate_llm_response

# Load environment variables
load_dotenv()

CONDITION_NAME = "Retinitis Pigmentosa (RP)"

SYSTEM_PROMPT = f"""
You are a medical assistant specialized in modifying structured medical data.
You will receive JSON input representing a dataset of medications for {CONDITION_NAME}.

Your task is to:
- Answer user requests about the provided medication data
- Either Add new columns or rows if requested, or modify existing ones
- Provide references, explanations and additional remarks
Always return only a JSON object with:
- "dataset": updated dataset
- "explanation": explanation of changes and additional information related to the findings. 
Specify the change made for each medication
- "references": References for findings, i.e. links to scientific papers or websites. 
Specify which reference relates to which finding on each medication.

Additional guidelines:
1. Please respond in valid JSON format only.
2. Make sure the JSON is valid, e.g. has no unterminated strings or missing commas.
3. Ensure the response starts with `{{` and ends with `}}` without any trailing text.
"""


def update_dataframe(records: List[Dict] | pd.DataFrame):
    """Update the DataFrame with new records. """
    print(f"UPDATING DATAFRAME: {records}")
    if isinstance(records, pd.DataFrame):
        new_data = records
    else:
        new_data = pd.DataFrame(records)

    st.session_state.df = new_data  # Assign the updated DataFrame
    #st.rerun()  # Trigger a rerun


# Page config
st.set_page_config(layout="wide", page_title="RP Medication Analyzer")
col1, col2 = st.columns([2, 18])
col1.image("rp_logo.jpg", use_container_width=True)
col2.title("Analyze RP Related Medications")

# Sidebar for API Key settings
with st.sidebar:
    st.subheader("Select AI service")
    llm_provider = st.radio(options=["Perplexity.ai", "OpenAI"], index=0, label="API")

    api_key = None  # Initialize API key

    if llm_provider == "OpenAI":
        st.subheader("OpenAI API key")
        api_base_input = st.text_input(
            "Enter API Base (Leave empty to use env variable)",
            value=os.environ.get("OPENAI_API_BASE", ""),
        )
        api_key_input = st.text_input(
            "Enter API Key",
            type="password",
            value=os.environ.get("OPENAI_API_KEY", ""),
        )

        openai_api_base = api_base_input if api_base_input else os.environ.get("OPENAI_API_BASE")
        api_key = api_key_input if api_key_input else os.environ.get("OPENAI_API_KEY")

        # Validate API key presence
        if not api_key:
            st.error("🚨 OpenAI API key is required!")

        openai_client = OpenAI(api_key=api_key)
        openai_client.api_base = openai_api_base

    elif llm_provider == "Perplexity.ai":
        st.subheader("Perplexity.ai API key")
        api_key_input = st.text_input(
            "Enter API Key",
            type="password",
            value=os.environ.get("PERPLEXITY_API_KEY", ""),
        )
        api_key = api_key_input if api_key_input else os.environ.get("PERPLEXITY_API_KEY")

        # Validate API key presence
        if not api_key:
            st.error("🚨 Perplexity.ai API key is required!")


# Ensure session persistence
if "df" not in st.session_state:
    st.session_state.df = None
if "uploaded_file" not in st.session_state:
    st.session_state.uploaded_file = None
if "explanation" not in st.session_state:
    st.session_state.explanation = "No modifications yet."
if "references" not in st.session_state:
    st.session_state.references = "No additional references."
if "last_prompt" not in st.session_state:
    st.session_state.last_prompt = ""
if "last_response" not in st.session_state:
    st.session_state.last_response = {}
if "history" not in st.session_state:
    st.session_state.history = []  # Stores all past interactions

# File uploader
file = st.file_uploader("Upload an Excel file", type=["xlsx"])

print(f"FILE: {file}")
if file and file != st.session_state.uploaded_file:
    try:
        with pd.ExcelFile(file) as xls:
            if "Metadata" in xls.sheet_names:
                st.session_state.history = pd.read_excel(xls, sheet_name="Metadata").to_dict(orient="records")
            if "Data" in xls.sheet_names:
                data_df = pd.read_excel(xls, sheet_name="Data")
                update_dataframe(data_df)
            else:
                st.error("🚨 No 'Data' sheet found in the uploaded file. Make sure the file has it")

        st.session_state.uploaded_file = file
        print("File uploaded successfully!")
        st.success("βœ… File uploaded successfully!")
    except Exception as e:
        print(f"Error reading file: {e}")
        st.error(f"🚨 Error reading file: {e}")


if st.session_state.df is not None:
    st.write("### Updated Dataset")
    st.dataframe(st.session_state.df, use_container_width=True)
else:
    st.warning("⚠️ Upload a file to proceed.")

# Explanation & remarks
if st.session_state.explanation:
    with st.expander("Explanation and remarks"):
        st.info(st.session_state.explanation)
if st.session_state.references:
    with st.expander("References"):
        st.warning(st.session_state.references)
if st.session_state.last_prompt:
    with st.expander("πŸ“œ Sent Prompt"):
        st.code(st.session_state.last_prompt, language="plaintext")

# if st.session_state.last_response:
#     with st.expander("🧠 LLM Response (Raw)"):
#         st.json(st.session_state.last_response)

# User query input
input_text = st.chat_input("Type your prompt here")

# 🚨 Validate: Ensure both API key and dataset are present before making an API call
if input_text:
    if not api_key:
        st.error("🚨 API key is missing! Please provide a valid key before proceeding.")
    elif st.session_state.df is None:
        st.error("🚨 No dataset uploaded! Please upload an Excel file.")
    else:
        # Convert dataframe to JSON for LLM processing
        json_data = st.session_state.df.to_json(orient="records")
        print(json_data)
        with st.spinner(f"Processing request: *{input_text}*..."):
            response = None  # Ensure response is defined before use

            # Call the appropriate LLM provider
            if llm_provider == "OpenAI":
                response = llm_calls.query_openai(
                    system_prompt=SYSTEM_PROMPT,
                    user_prompt=input_text,
                    json_data=json_data,
                    openai_client=openai_client,
                )
            elif llm_provider == "Perplexity.ai":
                response = llm_calls.query_perplexity(
                    system_prompt=SYSTEM_PROMPT,
                    user_prompt=input_text,
                    json_data=json_data,
                    api_key=api_key,
                )

            print(f"Response:{response}")

        # Ensure response exists before processing
        if response:
            st.session_state.df = None
            try:
                parsed_response = validate_llm_response(response)
                print(f"Parsed response: {parsed_response}")

                st.session_state.last_prompt = input_text
                st.session_state.last_response = response  # Keep full JSON response

                # Display structured output
                if "error" in parsed_response:
                    st.error(parsed_response["error"])
                else:
                    print(f"Parsed data: {parsed_response['dataset']}")
                    update_dataframe(parsed_response["dataset"])
                    st.session_state.explanation = parsed_response["explanation"]
                    st.session_state.references = parsed_response["references"]
                    st.session_state.history.append({
                        "Prompt": input_text,
                        "Explanation": parsed_response["explanation"],
                        "References": parsed_response["references"]
                    })
            except json.JSONDecodeError:
                st.error("🚨 Error parsing response: Invalid JSON format.")
            except Exception as e:
                st.error(f"🚨 Unexpected error: {e}")

            st.rerun()


# πŸ“₯ Download Updated Excel
if st.session_state.df is not None:
    st.sidebar.subheader("Download Updated Dataset")

    def generate_excel(dataframe, history):
        output_stream = io.BytesIO()
        with pd.ExcelWriter(output_stream, engine="xlsxwriter") as writer:
            dataframe.to_excel(writer, index=False, sheet_name="Data")
            # Convert history to DataFrame and save in a new sheet
            if history:
                history_df = pd.DataFrame(history)
                history_df.to_excel(writer, index=False, sheet_name="Metadata")

            workbook = writer.book

            # Apply word wrapping
            for sheet_name in ["Data", "Metadata"]:
                if sheet_name in writer.sheets:
                    worksheet = writer.sheets[sheet_name]
                    wrap_format = workbook.add_format({"text_wrap": True, "align": "top", "valign": "top"})

                    # Apply word wrap to all columns
                    df_to_format = dataframe if sheet_name == "Data" else history_df
                    for col_num, col_name in enumerate(df_to_format.columns):
                        worksheet.set_column(col_num, col_num, 30, wrap_format)  # Adjust width if needed

        output_stream.seek(0)
        return output_stream


    st.sidebar.download_button(
        "πŸ“₯ Download Excel File",
        data=generate_excel(st.session_state.df, st.session_state.history),
        file_name="updated_dataset.xlsx",
        mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
    )