import inspect import json import logging import os from typing import List, Type import gradio as gr import spacy # noqa from dotenv import load_dotenv from gradio import routes from transformers import pipeline load_dotenv() TOKENS2INT_ERROR_INT = 32202 log = logging.getLogger() ONES = [ "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen", ] # token_mapping = json.load(open('str_mapping.json')) CHAR_MAPPING = { "-": " ", "_": " ", } CHAR_MAPPING.update((str(i), word) for i, word in enumerate([" " + s + " " for s in ONES])) TOKEN_MAPPING = dict(enumerate([" " + s + " " for s in ONES])) BQ_JSON = os.environ['BQ_JSON'] def tokenize(text): return text.split() def detokenize(tokens): return ' '.join(tokens) def replace_tokens(tokens, token_mapping=TOKEN_MAPPING): return [token_mapping.get(tok, tok) for tok in tokens] def replace_chars(text, char_mapping=CHAR_MAPPING): return ''.join((char_mapping.get(c, c) for c in text)) def tokens2int(tokens, numwords={}): """ Convert an English str containing number words into an int >>> text2int("nine") 9 >>> text2int("forty two") 42 >>> text2int("1 2 three") 123 """ if not numwords: tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"] scales = ["hundred", "thousand", "million", "billion", "trillion"] numwords["and"] = (1, 0) for idx, word in enumerate(ONES): numwords[word] = (1, idx) for idx, word in enumerate(tens): numwords[word] = (1, idx * 10) for idx, word in enumerate(scales): numwords[word] = (10 ** (idx * 3 or 2), 0) current = result = 0 for word in tokens: if word not in numwords: raise Exception("Illegal word: " + word) scale, increment = numwords[word] current = current * scale + increment if scale > 100: result += current current = 0 return str(result + current) def text2int(text): return tokens2int(tokenize(replace_chars(text))) def try_text2int(text): text = str(text) try: intstr = tokens2int(tokens2int(tokenize(replace_chars(text)))) except Exception as e: log.error(str(e)) log.error(f'User input: {text}') intstr = TOKENS2INT_ERROR_INT return str(intstr) def try_text2int_preprocessed(text): text = str(text) try: tokens = replace_tokens(tokenize(replace_chars(str(text)))) except Exception as e: log.error(str(e)) tokens = text.split() try: intstr = tokens2int(tokens) except Exception as e: log.error(str(e)) intstr = str(TOKENS2INT_ERROR_INT) return intstr def get_types(cls_set: List[Type], component: str): docset = [] types = [] if component == "input": for cls in cls_set: doc = inspect.getdoc(cls) doc_lines = doc.split("\n") docset.append(doc_lines[1].split(":")[-1]) types.append(doc_lines[1].split(")")[0].split("(")[-1]) else: for cls in cls_set: doc = inspect.getdoc(cls) doc_lines = doc.split("\n") docset.append(doc_lines[-1].split(":")[-1]) types.append(doc_lines[-1].split(")")[0].split("(")[-1]) return docset, types routes.get_types = get_types sentiment = pipeline(task="sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english") def get_sentiment(text): return sentiment(text) with gr.Blocks() as html_block: gr.Markdown("# Rori - Mathbot") with gr.Tab("Text to integer"): inputs_text2int = [ gr.Text(placeholder="Type a number as text or a sentence", label="Text to process", value="forty two"), ] outputs_text2int = gr.Textbox(label="Output integer") button_text2int = gr.Button("text2int") button_text2int.click( fn=try_text2int, inputs=inputs_text2int, outputs=outputs_text2int, api_name="text2int", ) examples_text2int = [ "one thousand forty seven", "one hundred", ] gr.Examples(examples=examples_text2int, inputs=inputs_text2int) gr.Markdown(r""" ## API ```python import requests requests.post( url="https://tangibleai-mathtext.hf.space/run/text2int", json={"data": ["one hundred forty five"]} ).json() ``` Or using `curl`: ```bash curl -X POST https://tangibleai-mathtext.hf.space/run/text2int -H 'Content-Type: application/json' -d '{"data": ["one hundred forty five"]}' ``` {bq_json}""" + f"{json.loads(BQ_JSON)['type']}") with gr.Tab("Text to integer preprocessed"): inputs_text2int_preprocessed = [ gr.Text(placeholder="Type a number as text or a sentence", label="Text to process", value="forty two"), ] outputs_text2int_preprocessed = gr.Textbox(label="Output integer") button_text2int = gr.Button("text2int preprocessed") button_text2int.click( fn=try_text2int_preprocessed, inputs=inputs_text2int_preprocessed, outputs=outputs_text2int_preprocessed, api_name="text2int-preprocessed", ) examples_text2int_preprocessed = [ "one thousand forty seven", "one hundred", ] gr.Examples(examples=examples_text2int_preprocessed, inputs=inputs_text2int_preprocessed) gr.Markdown(r""" ## API ```python import requests requests.post( url="https://tangibleai-mathtext.hf.space/run/text2int-preprocessed", json={"data": ["one hundred forty five"]} ).json() ``` Or using `curl`: ```bash curl -X POST https://tangibleai-mathtext.hf.space/run/text2int-preprocessed -H 'Content-Type: application/json' -d '{"data": ["one hundred forty five"]}' ``` {bq_json}""" + f"{json.loads(BQ_JSON)['type']}") with gr.Tab("Sentiment Analysis"): inputs_sentiment = [ gr.Text(placeholder="Type a number as text or a sentence", label="Text to process", value="I really like it!"), ] outputs_sentiment = gr.Textbox(label="Sentiment result") button_sentiment = gr.Button("sentiment analysis") button_sentiment.click( get_sentiment, inputs=inputs_sentiment, outputs=outputs_sentiment, api_name="sentiment-analysis" ) examples_sentiment = [ ["Totally agree!"], ["Sorry, I can not accept this!"], ] gr.Examples(examples=examples_sentiment, inputs=inputs_sentiment) gr.Markdown(r""" ## API ```python import requests requests.post( url="https://tangibleai-mathtext.hf.space/run/sentiment-analysis", json={"data": ["You are right!"]} ).json() ``` Or using `curl`: ```bash curl -X POST https://tangibleai-mathtext.hf.space/run/sentiment-analysis -H 'Content-Type: application/json' -d '{"data": ["You are right!"]}' ``` {bq_json}""" + f"{json.loads(BQ_JSON)['type']}") # interface = gr.Interface(lambda x: x, inputs=["text"], outputs=["text"]) # html_block.input_components = interface.input_components # html_block.output_components = interface.output_components # html_block.examples = None html_block.predict_durations = [] html_block.launch()