import os import re import gradio as gr import requests from bs4 import BeautifulSoup as bs from openai import OpenAI from lib import (fact_opinion_classifer, fact_summarizer, get_data_info, get_raw_data_html, get_raw_data_info, opinion_reason_classifer) client = OpenAI(api_key=os.getenv('OPENAI_API_KEY')) def main(url): response = requests.get(url) soup = bs(response.text, 'html.parser') article_elems = soup.select('#uamods > .article_body > div > p') sentence_text = "\n".join([article_elem.text for article_elem in article_elems]) sentences = [line.strip() for line in re.split('。|？|\n|!', sentence_text) if line.strip() != ""] fact_opinion_list = fact_opinion_classifer(client, sentences) if len(sentences) != len(fact_opinion_list): raise ValueError(f'GPTの回答の数が一致しませんでした。: {fact_opinion_list}, {sentences}') fact_main_message = '' + sentence_text opinion_main_message = '' + sentence_text opinions = [] facts = [] for sentence, fact_opinion in zip(sentences, fact_opinion_list): if fact_opinion == "Opinion": opinion_main_message = opinion_main_message.replace(sentence, f'{sentence}') fact_main_message = fact_main_message.replace(sentence, f'{sentence}') opinions.append(sentence) elif fact_opinion == "Fact": fact_main_message = fact_main_message.replace(sentence, f'{sentence}') opinion_main_message = opinion_main_message.replace(sentence, f'{sentence}') facts.append(sentence) else: print(f'error: not known fact_opinion option: {fact_opinion}') opinion_main_message = '

' + opinion_main_message.replace('\n', '

') + '

' fact_main_message = '

' + fact_main_message.replace('\n', '

') + '

' html_format = """

{message}

""" opinion_main_message = html_format.format(message=opinion_main_message) fact_main_message = html_format.format(message=fact_main_message) if len(opinions) == 0: opinion_reason_text = "

この文章には意見に関する文が見つかりませんでした。

" opinion_reason_html = f"""

{opinion_reason_text}

""" else: opinion_reasons = opinion_reason_classifer(client, opinions) all_opinion_data_html_format = "

{all_data}" opinion_data_html_format = """

{text}

解釈

{reason}

""" opinion_reason_text = "" for opinion, reason in zip(opinions, opinion_reasons): opinion_reason_text += opinion_data_html_format.format(text=opinion, reason=reason) opinion_reason_html = all_opinion_data_html_format.format(all_data=opinion_reason_text) fact_sentence = fact_summarizer(client, sentence_text) data_texts = get_data_info(client, facts) predicted_data = get_raw_data_info(client, data_texts) data_main_message, raw_data_html_txt = get_raw_data_html(sentence_text, data_texts, predicted_data) return fact_main_message, opinion_main_message, opinion_reason_html, fact_sentence, raw_data_html_txt, data_main_message if __name__ == "__main__": '''main''' with gr.Blocks( title='情報リテラシーサポートツール', theme='shivi/calm_seafoam', css=''' #title_{ text-align: center; } #lightblue_img{ display: inline-block; width:15%; } #lightblue_img img{ width:100%; } @media screen and (max-width:480px) { #lightblue_img{ width:50%; } } ''' ) as demo: with gr.Row(): with gr.Column(): gr.HTML(value='''

情報リテラシーサポートツール

''') gr.Markdown('### ※意見文の抽出精度は 93.3% です。誤っている可能性には十分注意してください。') inp = gr.Textbox(label='', placeholder="こちらに記事のURLをコピペしてください。") btn = gr.Button("Enter") with gr.Row(): with gr.Tab('事実'): with gr.Row(): with gr.Column(scale=1): out_fact_0 = gr.HTML("""

こちらに本文が表示されます。

""") with gr.Column(scale=1): gr.Markdown("## 本文から確実に言えること") out_fact_1 = gr.Markdown("""

こちらに分析内容が表示されます。

""") with gr.Tab('意見'): with gr.Row(): with gr.Column(scale=1): out_opinion_0 = gr.HTML("""

こちらに本文が表示されます。

""") with gr.Column(scale=1): gr.Markdown("## 意見文分析") out_opinion_1 = gr.HTML("""

こちらに分析内容が表示されます。

""") with gr.Tab('データ'): with gr.Row(): with gr.Column(scale=1): out_data_0 = gr.HTML("""

こちらに本文が表示されます。

""") with gr.Column(scale=1): gr.Markdown("## 加工データ推測") out_data_1 = gr.HTML("""

こちらに分析内容が表示されます。

""") with gr.Row(): with gr.Column(): gr.Markdown(value=''' ⚠︎実行には10~30秒ほどかかります。ご了承ください。 *** ''') description_jp = gr.Markdown(value=''' ## ニュース記事を読むにあたって総合的な情報リテラシーの不足のサポートを行うツールです ### ※情報リテラシーとは 1. 加工されていない生のデータが何か分かる 2. 事実と意見の区別がつく 3. 文章中から確実に言えることが何か分かる *** ''') gr.HTML(value='''

本デモについて

作成意図

偽情報や誤情報といった情報は通常の20倍もの速度で拡散されるといいます。このとき拡散する人はどうやら情報リテラシーが低い人が多いそうです。

情報リテラシーの改善には教育が急務ですがなかなか教材だけでは追いつかないことがあるかと思います。

本デモがそういった皆様の情報リテラシー向上の一助となれば幸いです。

作成者

東京大学工学部システム創成学科4年

堀川祐生

🔗 LinkedIn

''') inp.submit(main, [inp], [out_fact_0, out_opinion_0, out_opinion_1, out_fact_1, out_data_1, out_data_0]) btn.click(main, [inp], [out_fact_0, out_opinion_0, out_opinion_1, out_fact_1, out_data_1, out_data_0]) demo.launch() # demo.launch(auth=(os.getenv('USER_NAME'), os.getenv('PASSWORD')))