Spaces:
Sleeping
Sleeping
import json | |
import os | |
from typing import Generator, List, Optional | |
import pandas as pd | |
import requests | |
from dotenv import load_dotenv | |
from openai import OpenAI | |
load_dotenv() | |
def query_llm( | |
messages, | |
history: List, | |
df: Optional[pd.DataFrame], | |
llm_type: str, | |
api_key: str, | |
system_prompt: str, | |
) -> Generator[str, None, None]: | |
"""Chat function that streams responses using an LLM API. | |
Args: | |
messages (str or list): User input message(s). | |
history (list): Conversation history. | |
df (pd.DataFrame): a representation of the data already obtained | |
system_prompt (str): The syste prompt | |
api_key (str): The OpenAI api key | |
Returns: | |
str: The assistant's response. | |
""" | |
if not api_key: | |
if llm_type == "OpenAI": | |
api_key = os.environ.get("OPENAI_API_KEY") | |
elif llm_type == "Perplexity": | |
api_key = os.environ.get("PERPLEXITY_API_KEY") | |
else: | |
yield "No API key provided for the selected LLM type." | |
print(f"LLM Type: {llm_type}, API Key len: {len(api_key)}") # Debugging | |
if isinstance(messages, str): | |
messages = [{"role": "user", "content": messages}] | |
# Extract last 2 messages from history (if available) | |
history = history[-2:] if history else [] | |
# Build message history (prepend system prompt) | |
full_messages = [ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": f"Past interactions: {history}"}, | |
{ | |
"role": "assistant", | |
"content": f"Dataset: {df.to_json() if df is not None else {}}", | |
}, | |
] + messages | |
if llm_type == "Perplexity": | |
yield from query_perplexity(full_messages, api_key=api_key) | |
elif llm_type == "OpenAI": | |
yield from query_openai(full_messages, api_key=api_key) | |
else: | |
yield "Unsupported LLM type. Please choose either 'OpenAI' or 'Perplexity'." | |
def query_perplexity( | |
full_messages, | |
api_key: str, | |
url="https://api.perplexity.ai/chat/completions", | |
model="sonar-pro", | |
): | |
"""Query Perplexity AI API for a response. | |
Args: | |
full_messages (list): List of messages in the conversation. | |
api_key (str): Perplexity API key. | |
url (str): API endpoint URL. | |
model (str): Model to use for the query. | |
Returns: | |
str: Parsed JSON response from Perplexity AI API. | |
""" | |
payload = { | |
"model": model, | |
"messages": full_messages, | |
"stream": True, | |
} | |
headers = { | |
"Authorization": f"Bearer {api_key}", | |
"Content-Type": "application/json", | |
} | |
with requests.post(url, json=payload, headers=headers, stream=True) as response: | |
if response.status_code == 200: | |
for line in response.iter_lines(): | |
if line: | |
try: | |
line = line.decode("utf-8").strip() | |
if line.startswith("data: "): | |
line = line[len("data: ") :] # Remove "data: " prefix | |
data = json.loads(line) | |
if "choices" in data and len(data["choices"]) > 0: | |
yield data["choices"][0]["message"]["content"] | |
except json.JSONDecodeError: | |
yield f"Error decoding JSON: {line}" | |
else: | |
yield f"API request failed with status code {response.status_code}, details: {response.text}" | |
def query_openai(full_messages, api_key: str) -> Generator[str, None, None]: | |
"""Chat function that streams responses using OpenAI API. | |
Args: | |
full_messages (list): List of messages in the conversation. | |
api_key (str): OpenAI API key. | |
""" | |
openai_client = OpenAI(api_key=api_key) | |
response = openai_client.chat.completions.create( | |
model="gpt-4o", | |
messages=full_messages, | |
stream=True, # Enable streaming | |
) | |
llm_response = "" | |
for chunk in response: | |
if chunk.choices[0].delta.content: | |
llm_response += chunk.choices[0].delta.content | |
yield llm_response | |
def llm_extract_table(chat_output, llm_type, api_key) -> str: | |
system_prompt = """ | |
You are a pharmacology assistant specialized in analyzing and structuring medical data. | |
Your role is to extract information in either markdown, JSON or text, and turn it structured information. | |
You will be given output from a conversation with an LLM. This conversation should have a dataset formatted | |
as either json or markdown. Extract the dataset and return a JSON object. | |
The dataset should be a JSON object with a dict per medication, with the following format: | |
```json | |
{ | |
"Medications": [ | |
{"Name": "Medication Name", "key1": "value1", "key2": "value2",..}, | |
{"Name": "Medication Name", "key1": "value1", "key2": "value2",..} | |
] | |
} | |
Guidelines: | |
- Make sure the response contains only a valid JSON | |
- Avoid adding text before or after | |
""" | |
response = query_llm( | |
messages=chat_output, | |
history=None, | |
df=None, | |
llm_type=llm_type, | |
api_key=api_key, | |
system_prompt=system_prompt, | |
) | |
json_str = "".join(response).strip() | |
return json_str | |