# /// script # requires-python = ">=3.12" # dependencies = [ # "groq==0.18.0", # "pandas==2.2.3", # "marimo", # ] # /// import marimo __generated_with = "0.11.6" app = marimo.App(width="medium") @app.cell def _(): import marimo as mo return (mo,) @app.cell def _(mo): groq_api_key = mo.ui.text(label='Enter your groq api key here', kind='password') groq_api_key return (groq_api_key,) @app.cell(hide_code=True) def _(mo): mo.md( r""" # LLM for NER - do a [Few-shot prompting](https://huggingface.co/docs/transformers/main/en/tasks/prompting#few-shot-prompting) and repeat multiple times. The default prompt is: ```python Return a list of named entities in the text with your confidence score on a scale of 0 to 1 for this tag. The available entities are: "ADE", "Dosage", "Drug", "Duration", "Form", "Frequency", "Reason", "Route" and "Strength". For example: Text: MEDICATIONS : Lipitor , Tylenol with Codeine , Dilantin , previously on Decadron q.i.d . Named Entities: Lipitor (Drug: 0.87), Tylenol (Drug: 0.59) Here is your task: Text: The patient then developed oral sores and rash in the chest the night before admission which rapidly spread to the face , trunk , and upper extremities within the last 24 hours. Named Entities: Remember to answer in the exact form of the example. ``` """ ) return @app.cell def _(mo, models): # define some params call_groq_times = mo.ui.slider(10, 100, 10, 30, label='How many times do you want to call groq') model_ner = mo.ui.dropdown(options=models, value="llama3-8b-8192", label="Choose a LLM") ner_text = mo.ui.text_area(value="", label='Type your text here or leave it to default:') ner_tags = mo.ui.text_area(value="", label="Type the ner tags here or leave it to default:", placeholder='e.g. Country, Person') checkbox_ner = mo.ui.checkbox(label=' Whether to call groq api') mo.vstack([mo.md("# Experiment"), mo.hstack([call_groq_times, model_ner]), mo.hstack([ner_text, ner_tags]), checkbox_ner], align='center') return call_groq_times, checkbox_ner, model_ner, ner_tags, ner_text @app.cell def _( call_groq_times, chat_completion, checkbox_ner, client, extract_ner_from_assistant, mo, model_ner, prompt_ner, ): # calling groq result = [] if checkbox_ner.value: for _ in mo.status.progress_bar(range(call_groq_times.value), title='In Progress …', completion_title='Finished.'): try: answer = chat_completion(client, prompt_ner, model_ner.value) result += extract_ner_from_assistant(answer) except Exception: pass return answer, result @app.cell def _(mo, pd, result): # transform data data = pd.DataFrame.from_dict(result) # data.to_csv('data.csv') # data = pl.from_dicts(result) # data.write_csv('data.csv') try: transformed_df = mo.ui.dataframe(data) except Exception: df = pd.read_csv('data.csv') transformed_df = mo.ui.dataframe(df) return data, df, transformed_df @app.cell def _(mo, transformed_df): _md = mo.md( r""" The results are shown below, use __+ Add__ to apply different transforms and explore more: ------ """ ) mo.vstack([_md, transformed_df]) return @app.cell async def _(): import os import pandas as pd from functools import reduce import micropip await micropip.install("ssl") await micropip.install("groq") from groq import Groq return Groq, micropip, os, pd, reduce @app.cell def _(): # availabel models on groq models = [ "llama3-8b-8192", "llama3-70b-8192", "llama2-70b-4096", "mixtral-8x7b-32768", "gemma-7b-it", ] return (models,) @app.cell def _(Groq, groq_api_key): client = Groq(api_key=groq_api_key.value) return (client,) @app.cell def _(): default_sentence = "The patient then developed oral sores and rash in the chest the night before admission which rapidly spread to the face , trunk , and upper extremities within the last 24 hours." default_tags = ["ADE", "Dosage", "Drug", "Duration", "Form", "Frequency", "Reason", "Route" and "Strength"] return default_sentence, default_tags @app.cell def _(default_sentence, default_tags, ner_tags, ner_text): prompt_ner = fr"""Return a list of named entities in the text with your confidence score on a scale of 0 to 1 for this tag. The available entities are: {ner_tags.value.split(',') if ner_tags.value else default_tags}. For example: Text: MEDICATIONS : Lipitor , Tylenol with Codeine , Dilantin , previously on Decadron q.i.d . Named Entities: Lipitor (Drug: 0.87), Tylenol (Drug: 0.59) Here is your task: Text: {ner_text.value if ner_text.value else default_sentence} Named Entities: Remember to answer in the exact form of the example. """ prompt_ner return (prompt_ner,) @app.cell def _(): def chat_completion(client, prompt, model): completion = client.chat.completions.create( messages=[ { "role": "system", "content": "you will help me with some NER tasks." }, # set a user message for the assistant to respond to. { "role": "user", "content": prompt, } ], # The language model which will generate the completion. model=model, temperature=0.5, max_tokens=100, top_p=1, stop='', # If set, partial message deltas will be sent. stream=False, ) answer = completion.choices[0].message.content return answer return (chat_completion,) @app.cell def _(reduce): def extract_ner_from_assistant(answer: str) -> list[dict]: # initialize a generator tokens = (token for token in answer.split()) # iterate through tokens until for token in tokens: if token == "": break # e.g. ['oral sores (ADE: 0.98)', 'rash (ADE: 0.98)'] records = " ".join(list(tokens)).split(",") # clean data result = map( lambda record: reduce( lambda acc, elem: {**acc, **{elem[0]: elem[1]}}, zip( ["named entity", "tag", "score"], [ " ".join(record.split()[:-2]), str(record.split()[-2])[1:-1], float(str(record.split()[-1])[:-1]), ], ), {}, ## initial value of accumulator ), records, ) return list(result) return (extract_ner_from_assistant,) if __name__ == "__main__": app.run()