Spaces:

fukufuk
/

InformationLiteracySupportTool

Sleeping

File size: 8,963 Bytes

import os
import re

import gradio as gr
import requests
from bs4 import BeautifulSoup as bs
from openai import OpenAI

from lib import (fact_opinion_classifer, fact_summarizer, get_data_info,
                 get_raw_data_html, get_raw_data_info,
                 opinion_reason_classifer)

client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))


def main(url):
    response = requests.get(url)
    soup = bs(response.text, 'html.parser')
    article_elems = soup.select('#uamods > .article_body > div > p')
    sentence_text = "\n".join([article_elem.text for article_elem in article_elems])
    sentences = [line.strip() for line in re.split('。|？|\n|!', sentence_text) if line.strip() != ""]

    fact_opinion_list = fact_opinion_classifer(client, sentences)

    if len(sentences) != len(fact_opinion_list):
        raise ValueError(f'GPTの回答の数が一致しませんでした。: {fact_opinion_list}, {sentences}')
    
    fact_main_message = '' + sentence_text
    opinion_main_message = '' + sentence_text

    opinions = []
    facts = []
    for sentence, fact_opinion in zip(sentences, fact_opinion_list):
        if fact_opinion == "Opinion":
            opinion_main_message = opinion_main_message.replace(sentence, f'<b>{sentence}</b>')
            fact_main_message = fact_main_message.replace(sentence, f'<span style="color: gray;">{sentence}</span>')
            opinions.append(sentence)
        elif fact_opinion == "Fact":
            fact_main_message = fact_main_message.replace(sentence, f'<b>{sentence}</b>')
            opinion_main_message = opinion_main_message.replace(sentence, f'<span style="color: gray;">{sentence}</span>')
            facts.append(sentence)
        else:
            print(f'error: not known fact_opinion option: {fact_opinion}')

    opinion_main_message = '<p>' + opinion_main_message.replace('\n', '</p>\n<p>') + '</p>'
    fact_main_message = '<p>' + fact_main_message.replace('\n', '</p>\n<p>') + '</p>'

    html_format = """<div style="padding: 10px; overflow-x: scroll; border: 1px #999999 solid; height: 450px;"> {message}</div>"""
    opinion_main_message = html_format.format(message=opinion_main_message)
    fact_main_message = html_format.format(message=fact_main_message)

    if len(opinions) == 0:
        opinion_reason_text = "<h3>この文章には意見に関する文が見つかりませんでした。</h3>"
        opinion_reason_html = f"""<div style="padding: 10px; overflow-x: scroll; border: 1px #999999 solid; height: 450px;"> {opinion_reason_text}</div>"""
    else:
        opinion_reasons = opinion_reason_classifer(client, opinions)
        all_opinion_data_html_format = "<ul>{all_data}</ul>"
        opinion_data_html_format = """<li style="list-style: none; border-bottom:1px solid; border-top:1px solid; padding: 6px;"><div><b>{text}</b></div><div><details><summary><b>解釈</b></summary><span style="color: red;">{reason}</span></details></div></li>"""
        opinion_reason_text = ""
        for opinion, reason in zip(opinions, opinion_reasons):
            opinion_reason_text += opinion_data_html_format.format(text=opinion, reason=reason)
        opinion_reason_html = all_opinion_data_html_format.format(all_data=opinion_reason_text)

    fact_sentence = fact_summarizer(client, sentence_text)

    data_texts = get_data_info(client, facts)
    predicted_data = get_raw_data_info(client, data_texts)
    data_main_message, raw_data_html_txt = get_raw_data_html(sentence_text, data_texts, predicted_data)
    return fact_main_message, opinion_main_message, opinion_reason_html, fact_sentence, raw_data_html_txt, data_main_message


if __name__ == "__main__":
    '''main'''
    with gr.Blocks(
        title='情報リテラシーサポートツール',
        theme='shivi/calm_seafoam',
        css='''
        #title_{
            text-align: center;
        }
        #lightblue_img{
            display: inline-block;
            width:15%;
        }
        #lightblue_img img{
            width:100%;
        }
        @media screen and (max-width:480px) {
            #lightblue_img{
                width:50%;
            }
        }
        '''
    ) as demo:
        with gr.Row():
            with gr.Column():
                gr.HTML(value='''
                <h2 id="title_">情報リテラシーサポートツール</h2>
                ''')
                gr.Markdown('### ※意見文の抽出精度は <u>93.3%</u> です。誤っている可能性には十分注意してください。')
                inp = gr.Textbox(label='', placeholder="こちらに記事のURLをコピペしてください。")

                btn = gr.Button("Enter")
        with gr.Row():
            with gr.Tab('事実'):
                with gr.Row():
                    with gr.Column(scale=1):
                        out_fact_0 = gr.HTML("""<div style="padding: 10px; overflow-x: scroll; border: 1px #999999 solid; height: 450px;"><span style="opacity: 0.5;">こちらに本文が表示されます。</span></div>""")
                    with gr.Column(scale=1):
                        gr.Markdown("## 本文から確実に言えること")
                        out_fact_1 = gr.Markdown("""<div style="padding: 10px; overflow-x: scroll; border: 1px #999999 solid; height: 400px;"><span style="opacity: 0.5;">こちらに分析内容が表示されます。</span></div>""")
            with gr.Tab('意見'):
                with gr.Row():
                    with gr.Column(scale=1):
                        out_opinion_0 = gr.HTML("""<div style="padding: 10px; overflow-x: scroll; border: 1px #999999 solid; height: 450px;"><span style="opacity: 0.5;">こちらに本文が表示されます。</span></div>""")
                    with gr.Column(scale=1):
                        gr.Markdown("## 意見文分析")
                        out_opinion_1 = gr.HTML("""<div style="padding: 10px; overflow-x: scroll; border: 1px #999999 solid; height: 400px;"><span style="opacity: 0.5;">こちらに分析内容が表示されます。</span></div>""")
            with gr.Tab('データ'):
                with gr.Row():
                    with gr.Column(scale=1):
                        out_data_0 = gr.HTML("""<div style="padding: 10px; overflow-x: scroll; border: 1px #999999 solid; height: 450px;"><span style="opacity: 0.5;">こちらに本文が表示されます。</span></div>""")
                    with gr.Column(scale=1):
                        gr.Markdown("## 加工データ推測")
                        out_data_1 = gr.HTML("""<div style="padding: 10px; overflow-x: scroll; border: 1px #999999 solid; height: 400px;"><span style="opacity: 0.5;">こちらに分析内容が表示されます。</span></div>""")

        with gr.Row():
            with gr.Column():
                gr.Markdown(value='''
                ⚠︎実行には10~30秒ほどかかります。ご了承ください。
                ***
                ''')

                description_jp = gr.Markdown(value='''
## ニュース記事を読むにあたって総合的な情報リテラシーの不足のサポートを行うツールです

### ※情報リテラシーとは
1. 加工されていない生のデータが何か分かる
2. 事実と意見の区別がつく
3. 文章中から確実に言えることが何か分かる

***
''')

                gr.HTML(value='''
                <h1>本デモについて</h1>
                <fieldset style="border: 1px dashed #000000; padding: 10px;">
                    <legend><h2>作成意図</h2></legend>
                    <p>偽情報や誤情報といった情報は通常の20倍もの速度で拡散されるといいます。このとき拡散する人はどうやら情報リテラシーが低い人が多いそうです。</p>
                    <p>情報リテラシーの改善には教育が急務ですがなかなか教材だけでは追いつかないことがあるかと思います。</p>
                    <p>本デモがそういった皆様の情報リテラシー向上の一助となれば幸いです。</p>
                </fieldset>



                <h2>作成者</h2>
                <img src="https://tegakisozai.com/wp-content/uploads/2021/04/doubutu_penguin.png" width=150px>
                東京大学工学部システム創成学科4年
                <p>堀川祐生</p>
                <p>
                    &#x1f517;
                    <a href= "https://www.linkedin.com/in/祐生-堀川-a0a7a328b/" >LinkedIn</a>
                </p>
                ''')

        inp.submit(main, [inp], [out_fact_0, out_opinion_0, out_opinion_1,
                                 out_fact_1, out_data_1, out_data_0])
        btn.click(main, [inp], [out_fact_0, out_opinion_0, out_opinion_1,
                                out_fact_1, out_data_1, out_data_0])
    demo.launch()
    # demo.launch(auth=(os.getenv('USER_NAME'), os.getenv('PASSWORD')))