Spaces:

omri374
/

med-copilot

Sleeping

App Files Files Community

omri374 commited on Apr 6

Commit

8f78c8f

verified ·

1 Parent(s): c39b19c

Upload 5 files

Browse files

Files changed (5) hide show

src/data_handler.py +39 -0
src/gradio_utils.py +191 -0
src/llm_calls.py +165 -0
src/mocks.py +96 -0
src/parse_response.py +61 -0

src/data_handler.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import tempfile
+import io
+from typing import List
+import pandas as pd
+import base64
+def generate_excel_base64(dataframe: pd.DataFrame) -> str:
+    """Generates an Excel file from the provided data frame and returns it as a base64 string."""
+    output_stream = io.BytesIO()  # Create in-memory buffer
+    # Ensure `xlsxwriter` writes to the buffer
+    with pd.ExcelWriter(output_stream, engine="xlsxwriter") as writer:
+        dataframe.to_excel(writer, index=False, sheet_name="Data")
+    output_stream.seek(0)  # Move to the beginning for reading
+    base64_data = base64.b64encode(output_stream.getvalue()).decode(
+        "utf-8"
+    )  # Encode to base64
+    return base64_data  # Return base64 string directly
+def generate_excel(dataframe: pd.DataFrame) -> str:
+    """Generates an Excel file from the provided data frame."""
+    output_stream = io.BytesIO()  # Create in-memory buffer
+    # Ensure `xlsxwriter` writes to the buffer
+    with pd.ExcelWriter(output_stream, engine="xlsxwriter") as writer:
+        dataframe.to_excel(writer, index=False, sheet_name="Data")
+    output_stream.seek(0)  # Move to the beginning for reading
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") as tmp_file:
+        tmp_file.write(output_stream.getvalue())  # Write bytes to temp file
+        tmp_path = tmp_file.name  # Get temp file path
+    return tmp_path  # ✅ Return file path directly

src/gradio_utils.py ADDED Viewed

	@@ -0,0 +1,191 @@

+import pandas as pd
+from src.llm_calls import llm_extract_table
+from src.parse_response import extract_and_return_data_table
+import gradio as gr
+def __update_df_state(df_before, df_state, updated_df):
+    new_df = pd.DataFrame(updated_df)
+    if df_before is not None:
+        new_df_before = df_before + [df_state]
+    else:
+        new_df_before = [df_state]
+    new_df_after = []  # Clear redo history
+    new_df_state = new_df.copy()
+    return new_df_before, new_df_state, new_df_after
+def extract_table_from_chat(
+    chat_output, df_before, df_state, df_after, llm_type, api_key, key="Medications"
+):
+    try:
+        updated_df = extract_and_return_data_table(chat_output=chat_output, key=key)
+    except ValueError:
+        try:
+            json_str = llm_extract_table(chat_output, llm_type, api_key)
+            updated_df = extract_and_return_data_table(chat_output=json_str, key=key)
+        except KeyError:
+            gr.Error(
+                "Cannot extract table information from chat. "
+                "Please ask the LLM to provide the dataset in JSON format.",
+                duration=None,
+            )
+            updated_df = df_before
+        except ValueError:
+            gr.Error(
+                "Cannot extract table information from chat. "
+                "Please ask the LLM to provide the dataset in JSON format.",
+                duration=None,
+            )
+            updated_df = df_before
+    new_df_before, new_df_state, new_df_after = __update_df_state(
+        df_before, df_state, updated_df
+    )
+    return (
+        new_df_state,
+        new_df_before,
+        new_df_state,
+        new_df_after,
+        gr.update(interactive=True),
+        gr.update(interactive=False),
+    )
+def update_llm_selection(selected_llm):
+    if selected_llm == "OpenAI":
+        return gr.update(label="OpenAI API Key", placeholder="Enter OpenAI API Key")
+    elif selected_llm == "Perplexity":
+        return gr.update(
+            label="Perplexity API Key", placeholder="Enter Perplexity API Key"
+        )
+    else:
+        raise ValueError("Invalid LLM type selected.")
+def edit_or_save_changes(updated_df, df_before, df_state, df_after, current_edit_mode):
+    """Save user changes, update undo history."""
+    new_df = pd.DataFrame(updated_df)
+    new_df_before = df_before + [df_state.copy()]
+    new_df_after = []  # Clear redo history
+    new_df_state = new_df.copy()
+    if current_edit_mode == "Save":
+        # User wants to move from save to edit
+        return (
+            new_df,
+            new_df_before,
+            new_df_state,
+            new_df_after,
+            gr.update(
+                interactive=True
+            ),  # prev button is now enabled as there was a change
+            gr.update(interactive=False),  # next button
+            gr.update(interactive=True),  # df display
+            gr.update(value="Edit"),  # edit button
+            "Edit",
+        )
+    elif current_edit_mode == "Edit":
+        return (
+            new_df_state,
+            new_df_before,
+            new_df_state,
+            new_df_after,
+            gr.update(interactive=False),  # prev button
+            gr.update(interactive=False),  # next button
+            gr.update(interactive=True),  # df display
+            gr.update(value="Save"),  # edit button
+            "Save",
+        )
+    else:
+        raise ValueError(f"Wrong edit mode selected: {current_edit_mode}. ")
+def undo(df_before, df_state, df_after):
+    """Undo user change without enabling Save button."""
+    if not df_before:
+        return (
+            df_state,
+            df_before,
+            df_state,
+            df_after,
+            gr.update(interactive=False),
+            gr.update(interactive=(len(df_after) > 0)),
+        )
+    new_df_after = df_after + [df_state.copy()]
+    new_df_state = df_before[-1]
+    new_df_before = df_before[:-1]
+    return (
+        new_df_state,
+        new_df_before,
+        new_df_state,
+        new_df_after,
+        gr.update(interactive=(len(new_df_before) > 0)),  # prev button
+        gr.update(interactive=(len(new_df_after) > 0)),  # next button
+    )
+def redo(df_before, df_state, df_after):
+    """Redo user change without enabling Save button."""
+    if not df_after:
+        return (
+            df_state,
+            df_before,
+            df_state,
+            df_after,
+            gr.update(interactive=(len(df_before) > 0)),
+            gr.update(interactive=False),
+        )
+    if df_state is None:
+        df_state = df_after
+    new_df_before = df_before + [df_state.copy()]
+    new_df_state = df_after[-1]
+    new_df_after = df_after[:-1]
+    return (
+        new_df_state,
+        new_df_before,
+        new_df_state,
+        new_df_after,
+        gr.update(interactive=(len(new_df_before) > 0)),
+        gr.update(interactive=(len(new_df_after) > 0)),
+    )
+# def toggle_save_edit(button_state, dataframe_display):
+#     if button_state == "Edit":
+#         return "Save", gr.update(interactive=True), gr.update(interactive=True)  # Enable DataFrame editing
+#     else:
+#         return "Edit", gr.update(interactive=True), gr.update(interactive=False)  # Disable DataFrame editing
+#
+def upload_file(file, df_before, df_state, df_after):
+    if file is None:
+        return gr.update()
+    df = pd.read_excel(file.name, engine="openpyxl")
+    new_df_before, new_df_state, new_df_after = __update_df_state(
+        df_before, df_state, df
+    )
+    # print("Uploaded DataFrame:\n", df)  # Print DataFrame to console
+    return (
+        df,
+        new_df_before,
+        df,
+        new_df_after,
+        gr.update(interactive=False),
+        gr.update(interactive=False),
+    )

src/llm_calls.py ADDED Viewed

	@@ -0,0 +1,165 @@

+import json
+import os
+from typing import Generator, List, Optional
+import pandas as pd
+import requests
+from dotenv import load_dotenv
+from openai import OpenAI
+load_dotenv()
+def query_llm(
+    messages,
+    history: List,
+    df: Optional[pd.DataFrame],
+    llm_type: str,
+    api_key: str,
+    system_prompt: str,
+) -> Generator[str, None, None]:
+    """Chat function that streams responses using an LLM API.
+    Args:
+        messages (str or list): User input message(s).
+        history (list): Conversation history.
+        df (pd.DataFrame): a representation of the data already obtained
+        system_prompt (str): The syste prompt
+        api_key (str): The OpenAI api key
+    Returns:
+        str: The assistant's response.
+    """
+    if not api_key:
+        if llm_type == "OpenAI":
+            api_key = os.environ.get("OPENAI_API_KEY")
+        elif llm_type == "Perplexity":
+            api_key = os.environ.get("PERPLEXITY_API_KEY")
+        else:
+            yield "No API key provided for the selected LLM type."
+    print(f"LLM Type: {llm_type}, API Key len: {len(api_key)}")  # Debugging
+    if isinstance(messages, str):
+        messages = [{"role": "user", "content": messages}]
+    # Extract last 2 messages from history (if available)
+    history = history[-2:] if history else []
+    # Build message history (prepend system prompt)
+    full_messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": f"Past interactions: {history}"},
+        {
+            "role": "assistant",
+            "content": f"Dataset: {df.to_json() if df is not None else {}}",
+        },
+    ] + messages
+    if llm_type == "Perplexity":
+        yield from query_perplexity(full_messages, api_key=api_key)
+    elif llm_type == "OpenAI":
+        yield from query_openai(full_messages, api_key=api_key)
+    else:
+        yield "Unsupported LLM type. Please choose either 'OpenAI' or 'Perplexity'."
+def query_perplexity(
+    full_messages,
+    api_key: str,
+    url="https://api.perplexity.ai/chat/completions",
+    model="sonar-pro",
+):
+    """Query Perplexity AI API for a response.
+    Args:
+        full_messages (list): List of messages in the conversation.
+        api_key (str): Perplexity API key.
+        url (str): API endpoint URL.
+        model (str): Model to use for the query.
+    Returns:
+        str: Parsed JSON response from Perplexity AI API.
+    """
+    payload = {
+        "model": model,
+        "messages": full_messages,
+        "stream": True,
+    }
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json",
+    }
+    with requests.post(url, json=payload, headers=headers, stream=True) as response:
+        if response.status_code == 200:
+            for line in response.iter_lines():
+                if line:
+                    try:
+                        line = line.decode("utf-8").strip()
+                        if line.startswith("data: "):
+                            line = line[len("data: ") :]  # Remove "data: " prefix
+                        data = json.loads(line)
+                        if "choices" in data and len(data["choices"]) > 0:
+                            yield data["choices"][0]["message"]["content"]
+                    except json.JSONDecodeError:
+                        yield f"Error decoding JSON: {line}"
+        else:
+            yield f"API request failed with status code {response.status_code}, details: {response.text}"
+def query_openai(full_messages, api_key: str) -> Generator[str, None, None]:
+    """Chat function that streams responses using OpenAI API.
+    Args:
+        full_messages (list): List of messages in the conversation.
+        api_key (str): OpenAI API key.
+    """
+    openai_client = OpenAI(api_key=api_key)
+    response = openai_client.chat.completions.create(
+        model="gpt-4-turbo",
+        messages=full_messages,
+        stream=True,  # Enable streaming
+    )
+    llm_response = ""
+    for chunk in response:
+        if chunk.choices[0].delta.content:
+            llm_response += chunk.choices[0].delta.content
+            yield llm_response
+def llm_extract_table(chat_output, llm_type, api_key) -> str:
+    system_prompt = """
+    You are a pharmacology assistant specialized in analyzing and structuring medical data.
+    Your role is to extract information in either markdown, JSON or text, and turn it structured information.
+    You will be given output from a conversation with an LLM. This conversation should have a dataset formatted
+    as either json or markdown. Extract the dataset and return a JSON object.
+    The dataset should be a JSON object with a dict per medication, with the following format:
+    ```json
+    {
+        "Medications": [
+            {"Name": "Medication Name", "key1": "value1", "key2": "value2",..},
+            {"Name": "Medication Name", "key1": "value1", "key2": "value2",..}
+        ]
+    }
+    Guidelines:
+    - Make sure the response contains only a valid JSON
+    - Avoid adding text before or after
+    """
+    response = query_llm(
+        messages=chat_output,
+        history=None,
+        df=None,
+        llm_type=llm_type,
+        api_key=api_key,
+        system_prompt=system_prompt,
+    )
+    json_str = "".join(response).strip()
+    return json_str

src/mocks.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import json
+import random
+from typing import List, Optional
+import pandas as pd
+def get_current_df(dfs: List[pd.DataFrame], current: int) -> pd.DataFrame:
+    if len(dfs) == 0:
+        return pd.DataFrame()
+    else:
+        return dfs[current]
+def query_llm_mock(
+    messages,
+    history: List,
+    df: pd.DataFrame,
+    llm_type: str,
+    api_key: str,
+    system_prompt: str,
+):
+    """Chat function that streams responses using mock llm.
+    Args:
+        messages (str or list): User input message(s).
+        history (list): Conversation history.
+        dfs (List[pd.DataFrame): a representation of the data already obtained
+        system_prompt (str): The syste prompt
+        openai_client (OpenAI): The OpenAI client
+    Returns:
+        str: The assistant's response.
+    """
+    mock_json = json.dumps(
+        {
+            "Medications": [
+                {
+                    "Medication name": "Tamsulosin",
+                    "Passes_RBB": "Yes",
+                    "Random": random.random(),
+                },
+                {
+                    "Medication name": "Metoprolol",
+                    "Passes_RBB": "Yes",
+                    "Random": random.random(),
+                },
+                {
+                    "Medication name": "Bromocriptine",
+                    "Passes_RBB": "Yes",
+                    "Random": random.random(),
+                },
+                {
+                    "Medication name": "Reserpine",
+                    "Passes_RBB": "Yes",
+                    "Random": random.random(),
+                },
+                {
+                    "Medication name": "Rasagiline",
+                    "Passes_RBB": "Yes",
+                    "Random": random.random(),
+                },
+            ]
+        }
+    )
+    yield (
+        f"Good question!\n"
+        f"Here's the data frame in JSON format:\n"
+        f"```json\n"
+        f"{mock_json if random.random() > 0.5 else ''}\n"
+        f"```\n\n"
+        f"Hope this is useful."
+    )
+def llm_extract_table_mock(chat_output, llm_type, api_key) -> str:
+    dic = {
+        "Medications": [
+            {
+                "Name": "Medication Name",
+                "key1": "value1",
+                "key2": "value2",
+                "Random": random.random(),
+            },
+            {
+                "Name": "Medication Name",
+                "key1": "value1",
+                "key2": "value2",
+                "Random": random.random(),
+            },
+        ]
+    }
+    return json.dumps(dic)

src/parse_response.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import re
+import json
+from typing import Optional
+import pandas as pd
+import simplejson
+def json_to_dict(response: str) -> dict:
+    """Convert a JSON string to a Python dictionary.
+    Args:
+        response (str): JSON string to convert.
+    Returns:
+        dict: Parsed JSON as a dictionary.
+    Raises:
+        ValueError: If the JSON string is invalid.
+    """
+    # extract dict from json
+    try:
+        match = re.search(r"\{.*}", response, re.DOTALL)
+        if match is None:
+            raise ValueError("No valid JSON found in the response.")
+        match_response = match.group()
+        match_response = json.loads(match_response)
+    except json.JSONDecodeError:
+        try:
+            match_response = simplejson.loads(response)  # More forgiving JSON parser
+        except simplejson.JSONDecodeError as e:
+            raise ValueError(f"Invalid JSON response: {e}")
+    return match_response  # Return as a structured dictionary
+def json_to_pandas(json_data: str, key: Optional[str] = None) -> pd.DataFrame:
+    """Convert JSON data to a pandas DataFrame."""
+    try:
+        dic = json_to_dict(json_data)
+        if key:
+            dic = dic[key]
+        df = pd.DataFrame(dic)
+        return df
+    except ValueError as e:
+        raise ValueError(f"Invalid JSON data: {e}")
+def extract_and_return_data_table(chat_output, key="Medications"):
+    """Extract a pandas data frame out of the chat.
+    Try rule-based first and use LLM if it fails."""
+    if chat_output:
+        if "content" in chat_output[-1]:
+            chat_output = chat_output[-1]["content"]
+    print(f"chat output: {chat_output}type: {type(chat_output)}")
+    try:
+        df = json_to_pandas(chat_output, key=key)
+    except ValueError as e:
+        raise ValueError(e)
+    return df