sample1 / app.py
Haleshot's picture
push new demo
b7db177 unverified
# /// script
# requires-python = ">=3.12"
# dependencies = [
# "groq==0.18.0",
# "pandas==2.2.3",
# "marimo",
# ]
# ///
import marimo
__generated_with = "0.11.6"
app = marimo.App(width="medium")
@app.cell
def _():
import marimo as mo
return (mo,)
@app.cell
def _(mo):
groq_api_key = mo.ui.text(label='Enter your groq api key here', kind='password')
groq_api_key
return (groq_api_key,)
@app.cell(hide_code=True)
def _(mo):
mo.md(
r"""
# LLM for NER
- do a [Few-shot prompting](https://huggingface.co/docs/transformers/main/en/tasks/prompting#few-shot-prompting) and repeat multiple times.
The default prompt is:
```python
Return a list of named entities in the text with your confidence score on a scale of 0 to 1 for this tag.
The available entities are: "ADE", "Dosage", "Drug", "Duration", "Form", "Frequency", "Reason", "Route" and "Strength".
For example:
Text: MEDICATIONS : Lipitor , Tylenol with Codeine , Dilantin , previously on Decadron q.i.d .
Named Entities: <start> Lipitor (Drug: 0.87), Tylenol (Drug: 0.59) <end>
Here is your task:
Text: The patient then developed oral sores and rash in the chest the night before admission which rapidly spread to the face , trunk , and upper extremities within the last 24 hours.
Named Entities:
Remember to answer in the exact form of the example.
```
"""
)
return
@app.cell
def _(mo, models):
# define some params
call_groq_times = mo.ui.slider(10, 100, 10, 30, label='How many times do you want to call groq')
model_ner = mo.ui.dropdown(options=models, value="llama3-8b-8192", label="Choose a LLM")
ner_text = mo.ui.text_area(value="", label='Type your text here or leave it to default:')
ner_tags = mo.ui.text_area(value="", label="Type the ner tags here or leave it to default:", placeholder='e.g. Country, Person')
checkbox_ner = mo.ui.checkbox(label=' Whether to call groq api')
mo.vstack([mo.md("# Experiment"), mo.hstack([call_groq_times, model_ner]), mo.hstack([ner_text, ner_tags]), checkbox_ner], align='center')
return call_groq_times, checkbox_ner, model_ner, ner_tags, ner_text
@app.cell
def _(
call_groq_times,
chat_completion,
checkbox_ner,
client,
extract_ner_from_assistant,
mo,
model_ner,
prompt_ner,
):
# calling groq
result = []
if checkbox_ner.value:
for _ in mo.status.progress_bar(range(call_groq_times.value), title='In Progress …', completion_title='Finished.'):
try:
answer = chat_completion(client, prompt_ner, model_ner.value)
result += extract_ner_from_assistant(answer)
except Exception:
pass
return answer, result
@app.cell
def _(mo, pd, result):
# transform data
data = pd.DataFrame.from_dict(result)
# data.to_csv('data.csv')
# data = pl.from_dicts(result)
# data.write_csv('data.csv')
try:
transformed_df = mo.ui.dataframe(data)
except Exception:
df = pd.read_csv('data.csv')
transformed_df = mo.ui.dataframe(df)
return data, df, transformed_df
@app.cell
def _(mo, transformed_df):
_md = mo.md(
r"""
The results are shown below, use __+ Add__ to apply different transforms and explore more:
------
"""
)
mo.vstack([_md, transformed_df])
return
@app.cell
async def _():
import os
import pandas as pd
from functools import reduce
import micropip
await micropip.install("ssl")
await micropip.install("groq")
from groq import Groq
return Groq, micropip, os, pd, reduce
@app.cell
def _():
# availabel models on groq
models = [
"llama3-8b-8192",
"llama3-70b-8192",
"llama2-70b-4096",
"mixtral-8x7b-32768",
"gemma-7b-it",
]
return (models,)
@app.cell
def _(Groq, groq_api_key):
client = Groq(api_key=groq_api_key.value)
return (client,)
@app.cell
def _():
default_sentence = "The patient then developed oral sores and rash in the chest the night before admission which rapidly spread to the face , trunk , and upper extremities within the last 24 hours."
default_tags = ["ADE", "Dosage", "Drug", "Duration", "Form", "Frequency", "Reason", "Route" and "Strength"]
return default_sentence, default_tags
@app.cell
def _(default_sentence, default_tags, ner_tags, ner_text):
prompt_ner = fr"""Return a list of named entities in the text with your confidence score on a scale of 0 to 1 for this tag.
The available entities are: {ner_tags.value.split(',') if ner_tags.value else default_tags}.
For example:
Text: MEDICATIONS : Lipitor , Tylenol with Codeine , Dilantin , previously on Decadron q.i.d .
Named Entities: <start> Lipitor (Drug: 0.87), Tylenol (Drug: 0.59) <end>
Here is your task:
Text: {ner_text.value if ner_text.value else default_sentence}
Named Entities:
Remember to answer in the exact form of the example.
"""
prompt_ner
return (prompt_ner,)
@app.cell
def _():
def chat_completion(client, prompt, model):
completion = client.chat.completions.create(
messages=[
{
"role": "system",
"content": "you will help me with some NER tasks."
},
# set a user message for the assistant to respond to.
{
"role": "user",
"content": prompt,
}
],
# The language model which will generate the completion.
model=model,
temperature=0.5,
max_tokens=100,
top_p=1,
stop='<end>',
# If set, partial message deltas will be sent.
stream=False,
)
answer = completion.choices[0].message.content
return answer
return (chat_completion,)
@app.cell
def _(reduce):
def extract_ner_from_assistant(answer: str) -> list[dict]:
# initialize a generator
tokens = (token for token in answer.split())
# iterate through tokens until <start>
for token in tokens:
if token == "<start>":
break
# e.g. ['oral sores (ADE: 0.98)', 'rash (ADE: 0.98)']
records = " ".join(list(tokens)).split(",")
# clean data
result = map(
lambda record: reduce(
lambda acc, elem: {**acc, **{elem[0]: elem[1]}},
zip(
["named entity", "tag", "score"],
[
" ".join(record.split()[:-2]),
str(record.split()[-2])[1:-1],
float(str(record.split()[-1])[:-1]),
],
),
{}, ## initial value of accumulator
),
records,
)
return list(result)
return (extract_ner_from_assistant,)
if __name__ == "__main__":
app.run()