Spaces:

paragon-analytics
/

ResText

Sleeping

File size: 5,048 Bytes

# Import packages:

import numpy as np
import matplotlib.pyplot as plt
import re
# tensorflow imports:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import losses
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras.optimizers import RMSprop
# # keras imports:
from keras.models import Model
from keras.layers import LSTM, Activation, Dense, Dropout, Input, Embedding, RepeatVector, TimeDistributed
from keras.preprocessing.text import Tokenizer
from keras_preprocessing import sequence
from tensorflow.keras.utils import to_categorical
from keras.callbacks import EarlyStopping
from keras.models import Sequential
from keras import layers
from keras.backend import clear_session
import pickle
import gradio as gr
import yake
import spacy
from spacy import displacy
import streamlit as st
import spacy_streamlit
nlp = spacy.load('en_core_web_sm')
import torch
import tensorflow as tf
from transformers import RobertaTokenizer, RobertaModel
from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
model = AutoModelForSequenceClassification.from_pretrained("paragon-analytics/bert_resil")

kw_extractor = yake.KeywordExtractor()
custom_kw_extractor = yake.KeywordExtractor(lan="en", n=2, dedupLim=0.2, top=10, features=None)

max_words = 2000
max_len = 111

# load the model from disk
filename = 'resil_lstm_model.sav'
lmodel = pickle.load(open(filename, 'rb'))

# load the model from disk
filename = 'tokenizer.pickle'
tok = pickle.load(open(filename, 'rb'))

def process_final_text(text):
    X_test = str(text).lower()
    l = []
    l.append(X_test)
    test_sequences = tok.texts_to_sequences(l)
    test_sequences_matrix = sequence.pad_sequences(test_sequences,maxlen=max_len)
    lstm_prob = lmodel.predict(test_sequences_matrix.tolist()).flatten()
    lstm_pred = np.where(lstm_prob>=0.5,1,0)
    
    encoded_input = tokenizer(X_test, return_tensors='pt')
    output = model(**encoded_input)
    scores = output[0][0].detach().numpy()
    scores = tf.nn.softmax(scores)
    
    # Get Keywords:
    keywords = custom_kw_extractor.extract_keywords(X_test)
    letter = []
    score = []
    for i in keywords:
        if i[1]>0.4:
            a = "+++"
        elif (i[1]<=0.4) and (i[1]>0.1):
            a = "++"
        elif (i[1]<=0.1) and (i[1]>0.01):
            a = "+"
        else: 
            a = "NA"
    
        letter.append(i[0])
        score.append(a)
    
    keywords = [(letter[i], score[i]) for i in range(0, len(letter))]
    
    # Get NER:
        # NER: 
    doc = nlp(text)
    sp_html = displacy.render(doc, style="ent", page=True, jupyter=False)
    NER = (
        ""
        + sp_html
        + ""
    )
    return {"Resilience": float(scores.numpy()[1]), "Non-Resilience": float(scores.numpy()[0])},keywords,NER
    
def main(prob1):
    text = str(prob1)
    obj = process_final_text(text)
    return obj[0],obj[1],obj[2]
    
title = "Welcome to **ResText** 🪐"
description1 = """
This app takes text (up to a few sentences) and predicts whether the text contains resilience messaging. Resilience messaging is a text message that is about being able to a) "adapt to change” and b) “bounce back after illness or hardship". The predictive model is a fine-tuned RoBERTa NLP model. Just add your text and hit Create & Analyze. Or, simply click on one of the examples to see how it works. ✨   
"""

with gr.Blocks(title=title) as demo:
    gr.Markdown(f"## {title}")
    gr.Markdown(description1)
    gr.Markdown("""---""")
    prob1 = gr.Textbox(label="Enter Your Text Here:",lines=2, placeholder="Type it here ...")
    submit_btn = gr.Button("Create & Analyze")
    #text = gr.Textbox(label="Text:",lines=2, placeholder="Please enter text here ...")
    #submit_btn2 = gr.Button("Analyze")

    with gr.Column(visible=True) as output_col:
        label = gr.Label(label = "Predicted Label")
        impplot = gr.HighlightedText(label="Important Words", combine_adjacent=False).style(
        color_map={"+++": "royalblue","++": "cornflowerblue",
         "+": "lightsteelblue", "NA":"white"})
        NER = gr.HTML(label = 'NER:')

    submit_btn.click(
        main,
        [prob1],
        [label,impplot,NER], api_name="ResText"
    )

    gr.Markdown("### Click on any of the examples below to see how it works:")
    gr.Examples([["Please stay at home and avoid unnecessary trips."],["Please stay at home and avoid unnecessary trips. We will survive this."],["We will survive this."],["Watch today’s news briefing with the latest updates on COVID-19 in Connecticut."],["So let's keep doing what we know works. Let's stay strong, and let's beat this virus. I know we can, and I know we can come out stronger on the other side."]], [prob1], [label,impplot,NER], main, cache_examples=True)
    
demo.launch()