File size: 6,020 Bytes
ade7754
 
eba411a
e474be5
 
 
ade7754
5ea4696
2f06227
e474be5
 
 
2f06227
e474be5
5ea4696
2f06227
e474be5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3394cb3
e474be5
5ea4696
2f06227
56acf35
9f45f9d
 
 
 
 
 
 
 
 
 
 
 
 
238b0d2
f114c57
d45a3dd
f114c57
30821be
9f45f9d
b8daae8
89c8460
30821be
9f45f9d
aff682b
5ea4696
9288c3f
fcc76e7
 
5ea4696
fcc76e7
5ea4696
fcc76e7
9288c3f
 
 
 
fcc76e7
 
87f3f0e
 
 
fcc76e7
 
 
 
 
 
87f3f0e
 
 
 
 
 
 
 
 
 
 
 
 
 
3394cb3
 
 
5ea4696
87f3f0e
3394cb3
 
9288c3f
3394cb3
 
e22c9b6
9288c3f
3394cb3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9288c3f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import streamlit as st
from transformers import pipeline
from TransformationQA_ZongqianLi.transformationqa import create_qa_database
from numpy import linspace
import jsonlines
import json

st.markdown("# 🎓 Auto-generating Question-Answering Datasets with Domain-Specific Knowledge for Language Models in Scientific Tasks", unsafe_allow_html=True)



##########
# Transformation Algorithm
##########
st.markdown('## 🖥️ QA Dataset Auto Generation', unsafe_allow_html=True)

cde_lst = ["./CDE_properties.jsonl"]
paper_lst = ["./reference_paper.json"]

st.session_state['cde'] = "./CDE_properties.jsonl"
st.session_state['cde'] = st.selectbox("ChemDataExtractor generated database path:", cde_lst)
st.write("Example of the ChemDataExtractor generated database: ")
with open(st.session_state['cde'], 'r') as file:
    for line in file:
        json_data = json.loads(line.strip())
        json_string = json.dumps(json_data, indent=4)
        st.text_area("", value=json_string, height=100)

paper = st.selectbox("Paper collection path:", paper_lst)

databases = []
with open(args.databases_location,'r+', encoding = "utf-8") as f:
    for item in jsonlines.Reader(f):
        databases.append(item)

        



##########
# Question Answering
##########
st.markdown('## 📖 Question Answering', unsafe_allow_html=True)
st.markdown('### Select a model: ', unsafe_allow_html=True)

# 定义模型配置选项
size_lst = ["-base", "-large"]
cased_lst = ["-cased", "-uncased"]
fpretrain_lst = ["None", "-scsmall", "-scmedium", "-sclarge"]
finetune_lst = ["-squad", "-scqa1", "-scqa2"]

# 为每个选项创建下拉菜单
size = st.selectbox("Choose a model size:", size_lst)
cased = st.selectbox("Whether distinguish upper and lowercase letters:", cased_lst)
fpretrain = st.selectbox("Further pretrained on a solar cell corpus:", fpretrain_lst)
finetune = st.selectbox("Finetuned on a QA dataset:", finetune_lst)

# 根据选择构建模型名称
if fpretrain == "None":
    model = "".join(["ZongqianLi/bert", size, cased, finetune])
else:
    model = "".join(["ZongqianLi/bert", size, cased, fpretrain, finetune])

# 显示用户选择的模型
st.write(f"Your selected model:")
st.markdown(f'##### {model}', unsafe_allow_html=True)

# 加载问答模型
pipe = pipeline("question-answering", model=model)
st.markdown('### Answer the question: ', unsafe_allow_html=True)

# 使用Session State来存储默认的问题和上下文
if 'default_question' not in st.session_state:
    st.session_state['default_question'] = "What is the value of FF?"
if 'default_context' not in st.session_state:
    st.session_state['default_context'] = "The referential DSSC with Pt CE was also measured under the same conditions, which yields η of 6.66% (Voc= 0.78 V, Jsc= 13.0 mA cm−2, FF = 65.9%)."

col1, col2 = st.columns(2)

with col1:
    if st.button("Solar Cell QA Dataset"):
        st.session_state['default_question'] = "What is the value of FF?"
        st.session_state['default_context'] = "The referential DSSC with Pt CE was also measured under the same conditions, which yields η of 6.66% (Voc= 0.78 V, Jsc= 13.0 mA cm−2, FF = 65.9%)."

with col2:
    if st.button("SuAD Dataset QA"):
        st.session_state['default_question'] = "To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?"
        st.session_state['default_context'] = """Architecturally, the school has a Catholic character. Atop the Main Building's gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary."""

# 这些输入字段现在使用session state来填充默认值
question = st.text_input("Enter the question: ", value=st.session_state['default_question'])
context = st.text_area("Enter the context: ", value=st.session_state['default_context'], height=100)

# 添加一个按钮,用户点击后执行问答
if st.button('Extract the answer'):
    if context and question:
        out = pipe({
            'question': question,
            'context': context
        })
        answer = out["answer"]
        st.write(f"Question: {question}")
        st.write(f"Answer: {answer}")
    else:
        st.write("Please enter both a question and context.")

##########
# Property Extraction
##########
st.markdown('## 📤 Property Extraction', unsafe_allow_html=True)

default_property = "FF"
default_context = "The referential DSSC with Pt CE was also measured under the same conditions, which yields η of 6.66% (Voc= 0.78 V, Jsc= 13.0 mA cm−2, FF = 65.9%)."

# 获取用户输入的问题和上下文
property = st.text_input("Enter the name of the property: ", value=default_property)
context = st.text_area("Enter the paper: ", value=default_context, height=100)

# 添加一个按钮,用户点击后执行问答
if st.button('Extract the property'):
    question_1 = f"What is the value of {property}?"
    if context and question_1:
        out = pipe({
            'question': question_1,
            'context': context
        })
        value = out["answer"]
        st.write(f"First-turn question: {question_1}")
        st.write(f"First-turn answer: {value}")
        question_2 = f"What material has {property} of {value}?"
        out = pipe({
            'question': question_2,
            'context': context
        })
        material = out["answer"]
        st.write(f"Second-turn question: {question_2}")
        st.write(f"First-turn answer: {material}")
    else:
        st.write("Please enter both a question and context.")