Spaces:
Runtime error
Runtime error
import pandas as pd | |
import streamlit as st | |
from keybert import KeyBERT | |
import yake | |
from keyphrase_vectorizers import KeyphraseCountVectorizer | |
def load_model(): | |
model = KeyBERT("AI-Growth-Lab/PatentSBERTa") | |
return model | |
model = load_model() | |
def predict_fn(text, model): | |
kw_extractor = yake.KeywordExtractor(top=50) | |
candidates = kw_extractor.extract_keywords(text) | |
keyphrases = [candidate[0] for candidate in candidates] | |
#kw_model = KeyBERT(model=kw_model) | |
keywords=model.extract_keywords(text, keyphrases, keyphrase_ngram_range=(1, 3), top_n=50) | |
return keywords | |
st.title("Patent Text Extractor") | |
placeholder = st.empty() | |
text = placeholder.text_area("Paste or write text", height=300) | |
button = st.button("Extract Keywords") | |
#top_n = st.sidebar.slider("Select a number of keywords", 1, 10, 50,20) | |
#min_ngram = st.sidebar.number_input("Minimum number of words in each keyword", 1) | |
#max_ngram = st.sidebar.number_input("Maximum number of words in each keyword", 3) | |
#st.sidebar.code(f"ngram_range=({min_ngram}, {max_ngram})") | |
#params = {"docs": text_input, "top_n": top_n, "stop_words": 'english',"vectorizer":KeyphraseCountVectorizer()} | |
#add_diversity = st.sidebar.checkbox("Adjust diversity of keywords") | |
#if add_diversity: | |
#method = st.sidebar.selectbox("Select a method", ("Max Sum Similarity", "Maximal Marginal Relevance")) | |
#if method == "Max Sum Similarity": | |
#nr_candidates = st.sidebar.slider("nr_candidates", 20, 50, 20, 2) | |
#params["use_maxsum"] = True | |
#params["nr_candidates"] = nr_candidates | |
#elif method == "Maximal Marginal Relevance": | |
#diversity = st.sidebar.slider("diversity", 0.1, 1.0, 0.6, 0.01) | |
#params["use_mmr"] = True | |
#params["diversity"] = diversity | |
#kw_extractor = yake.KeywordExtractor(top=50) | |
#candidates = kw_extractor.extract_keywords(text_input) | |
#keyphrases = [candidate[0] for candidate in candidates] | |
#kw_model = KeyBERT(model="google/bigbird-pegasus-large-bigpatent") | |
keywords=predict_fn(text, model) | |
#if keywords != []: | |
#keywords = model.extract_keywords(text_input,keyphrases, keyphrase_ngram_range=(1, 3), | |
#top_n=50,stop_words='english',vectorizer=KeyphraseCountVectorizer()) | |
if keywords != []: | |
st.info("Extracted keywords") | |
keywords = pd.DataFrame(keywords, columns=["Keyword", "Score"]) | |
st.table(keywords) |