|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import marimo |
|
|
|
__generated_with = "0.11.6" |
|
app = marimo.App(width="medium") |
|
|
|
|
|
@app.cell |
|
def _(): |
|
import marimo as mo |
|
return (mo,) |
|
|
|
|
|
@app.cell |
|
def _(mo): |
|
groq_api_key = mo.ui.text(label='Enter your groq api key here', kind='password') |
|
groq_api_key |
|
return (groq_api_key,) |
|
|
|
|
|
@app.cell(hide_code=True) |
|
def _(mo): |
|
mo.md( |
|
r""" |
|
# LLM for NER |
|
|
|
- do a [Few-shot prompting](https://huggingface.co/docs/transformers/main/en/tasks/prompting#few-shot-prompting) and repeat multiple times. |
|
|
|
The default prompt is: |
|
```python |
|
Return a list of named entities in the text with your confidence score on a scale of 0 to 1 for this tag. |
|
The available entities are: "ADE", "Dosage", "Drug", "Duration", "Form", "Frequency", "Reason", "Route" and "Strength". |
|
|
|
For example: |
|
Text: MEDICATIONS : Lipitor , Tylenol with Codeine , Dilantin , previously on Decadron q.i.d . |
|
Named Entities: <start> Lipitor (Drug: 0.87), Tylenol (Drug: 0.59) <end> |
|
|
|
Here is your task: |
|
Text: The patient then developed oral sores and rash in the chest the night before admission which rapidly spread to the face , trunk , and upper extremities within the last 24 hours. |
|
Named Entities: |
|
|
|
Remember to answer in the exact form of the example. |
|
``` |
|
""" |
|
) |
|
return |
|
|
|
|
|
@app.cell |
|
def _(mo, models): |
|
|
|
call_groq_times = mo.ui.slider(10, 100, 10, 30, label='How many times do you want to call groq') |
|
model_ner = mo.ui.dropdown(options=models, value="llama3-8b-8192", label="Choose a LLM") |
|
ner_text = mo.ui.text_area(value="", label='Type your text here or leave it to default:') |
|
ner_tags = mo.ui.text_area(value="", label="Type the ner tags here or leave it to default:", placeholder='e.g. Country, Person') |
|
checkbox_ner = mo.ui.checkbox(label=' Whether to call groq api') |
|
mo.vstack([mo.md("# Experiment"), mo.hstack([call_groq_times, model_ner]), mo.hstack([ner_text, ner_tags]), checkbox_ner], align='center') |
|
return call_groq_times, checkbox_ner, model_ner, ner_tags, ner_text |
|
|
|
|
|
@app.cell |
|
def _( |
|
call_groq_times, |
|
chat_completion, |
|
checkbox_ner, |
|
client, |
|
extract_ner_from_assistant, |
|
mo, |
|
model_ner, |
|
prompt_ner, |
|
): |
|
|
|
result = [] |
|
if checkbox_ner.value: |
|
for _ in mo.status.progress_bar(range(call_groq_times.value), title='In Progress …', completion_title='Finished.'): |
|
try: |
|
answer = chat_completion(client, prompt_ner, model_ner.value) |
|
result += extract_ner_from_assistant(answer) |
|
except Exception: |
|
pass |
|
return answer, result |
|
|
|
|
|
@app.cell |
|
def _(mo, pd, result): |
|
|
|
data = pd.DataFrame.from_dict(result) |
|
|
|
|
|
|
|
try: |
|
transformed_df = mo.ui.dataframe(data) |
|
except Exception: |
|
df = pd.read_csv('data.csv') |
|
transformed_df = mo.ui.dataframe(df) |
|
return data, df, transformed_df |
|
|
|
|
|
@app.cell |
|
def _(mo, transformed_df): |
|
_md = mo.md( |
|
r""" |
|
The results are shown below, use __+ Add__ to apply different transforms and explore more: |
|
------ |
|
""" |
|
) |
|
mo.vstack([_md, transformed_df]) |
|
return |
|
|
|
|
|
@app.cell |
|
async def _(): |
|
import os |
|
import pandas as pd |
|
from functools import reduce |
|
import micropip |
|
await micropip.install("ssl") |
|
await micropip.install("groq") |
|
from groq import Groq |
|
return Groq, micropip, os, pd, reduce |
|
|
|
|
|
@app.cell |
|
def _(): |
|
|
|
models = [ |
|
"llama3-8b-8192", |
|
"llama3-70b-8192", |
|
"llama2-70b-4096", |
|
"mixtral-8x7b-32768", |
|
"gemma-7b-it", |
|
] |
|
return (models,) |
|
|
|
|
|
@app.cell |
|
def _(Groq, groq_api_key): |
|
client = Groq(api_key=groq_api_key.value) |
|
return (client,) |
|
|
|
|
|
@app.cell |
|
def _(): |
|
default_sentence = "The patient then developed oral sores and rash in the chest the night before admission which rapidly spread to the face , trunk , and upper extremities within the last 24 hours." |
|
default_tags = ["ADE", "Dosage", "Drug", "Duration", "Form", "Frequency", "Reason", "Route" and "Strength"] |
|
return default_sentence, default_tags |
|
|
|
|
|
@app.cell |
|
def _(default_sentence, default_tags, ner_tags, ner_text): |
|
prompt_ner = fr"""Return a list of named entities in the text with your confidence score on a scale of 0 to 1 for this tag. |
|
The available entities are: {ner_tags.value.split(',') if ner_tags.value else default_tags}. |
|
|
|
For example: |
|
Text: MEDICATIONS : Lipitor , Tylenol with Codeine , Dilantin , previously on Decadron q.i.d . |
|
Named Entities: <start> Lipitor (Drug: 0.87), Tylenol (Drug: 0.59) <end> |
|
|
|
Here is your task: |
|
Text: {ner_text.value if ner_text.value else default_sentence} |
|
Named Entities: |
|
|
|
Remember to answer in the exact form of the example. |
|
""" |
|
prompt_ner |
|
return (prompt_ner,) |
|
|
|
|
|
@app.cell |
|
def _(): |
|
def chat_completion(client, prompt, model): |
|
completion = client.chat.completions.create( |
|
messages=[ |
|
{ |
|
"role": "system", |
|
"content": "you will help me with some NER tasks." |
|
}, |
|
|
|
{ |
|
"role": "user", |
|
"content": prompt, |
|
} |
|
], |
|
|
|
model=model, |
|
temperature=0.5, |
|
max_tokens=100, |
|
top_p=1, |
|
stop='<end>', |
|
|
|
stream=False, |
|
) |
|
answer = completion.choices[0].message.content |
|
return answer |
|
return (chat_completion,) |
|
|
|
|
|
@app.cell |
|
def _(reduce): |
|
def extract_ner_from_assistant(answer: str) -> list[dict]: |
|
|
|
tokens = (token for token in answer.split()) |
|
|
|
for token in tokens: |
|
if token == "<start>": |
|
break |
|
|
|
records = " ".join(list(tokens)).split(",") |
|
|
|
result = map( |
|
lambda record: reduce( |
|
lambda acc, elem: {**acc, **{elem[0]: elem[1]}}, |
|
zip( |
|
["named entity", "tag", "score"], |
|
[ |
|
" ".join(record.split()[:-2]), |
|
str(record.split()[-2])[1:-1], |
|
float(str(record.split()[-1])[:-1]), |
|
], |
|
), |
|
{}, |
|
), |
|
records, |
|
) |
|
return list(result) |
|
return (extract_ner_from_assistant,) |
|
|
|
|
|
if __name__ == "__main__": |
|
app.run() |
|
|