Spaces:

som11
/

named_entity_recognition

Running

File size: 3,625 Bytes

5e37eca

import streamlit as st 
import spacy
from spacy import displacy
from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import pipeline
import pandas as pd


st.title('Named Entity Recognizer')

st.write('Named Entity Recognition (NER) is like a smart highlighter that scans through text and highlights important words, such as people’s names, places, companies, and dates.')

st.write('')

with st.form(key='form_named_entity_recognition'):

    input_from_user = st.text_area('enter your input')

    model_options = st.selectbox('choose a model', ('Choose a model', 'Spacy\'s en_core_web_sm model', 'dslim/bert-base-NER model'))

    submit_button = st.form_submit_button('Submit')


if submit_button:

    if input_from_user == '':

        st.error('empty form submitted')
    
    else:

        if model_options == 'Choose a model':

            st.error('Please choose a model for named entity recognition')
    
        else:

            st.subheader('Result Analysis')

            if model_options == 'Choose a model':

                st.error('Please choose a model for Named Entity Recognition')

            elif model_options == 'Spacy\'s en_core_web_sm model':

                st.write('Model Used for Named Entity Recognition:')
                st.success(model_options)

                spacy_model = spacy.load('en_core_web_sm')

                res = spacy_model(input_from_user)

                st.write(f'Analysis of the detected entities from the text ==>')
                st.markdown(f'**{input_from_user}**')
                
                entities = [{'Entity': entity.text, 'Label of the Entity': entity.label_, 'Description of the Label': spacy.explain(entity.label_)} for entity in res.ents]

                df = pd.DataFrame(entities)

                st.table(df)

                st.write('Entites marked in the input text:')
                st.markdown(displacy.render(res, style='ent'), unsafe_allow_html=True)

            elif model_options == 'dslim/bert-base-NER model':

                st.write('Model Used for Named Entity Recognition:')
                st.success(model_options)
                
                tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
                model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")

                bert_ner_model = pipeline('ner', model=model, tokenizer=tokenizer)

                res = bert_ner_model(input_from_user)

                abbreviations = {
                    "O": "Outside of a named entity",
                    "B-MISC": "Beginning of a miscellaneous entity right after another miscellaneous entity",
                    "I-MISC": "Miscellaneous entity",
                    "B-PER": "Beginning of a person’s name right after another person’s name",
                    "I-PER": "Person’s name",
                    "B-ORG": "Beginning of an organization right after another organization",
                    "I-ORG": "Organization",
                    "B-LOC": "Beginning of a location right after another location",
                    "I-LOC": "Location"
                }

                st.write(f'Analysis of the detected entities from the text ==>')
                st.markdown(f'**{input_from_user}**')
                
                entities = [{'Entity': input_from_user[entity['start']:entity['end']], 'Label of the Entity': entity['entity'], 'Description of the Label': abbreviations.get(entity['entity'])} for entity in res]

                df = pd.DataFrame(entities)

                st.table(df)