Spaces:
Running
Running
File size: 7,589 Bytes
ade7754 eba411a e474be5 ade7754 5ea4696 73c3340 e474be5 3394cb3 e474be5 5ea4696 2f06227 56acf35 9f45f9d b099499 9f45f9d a83a581 9f45f9d 238b0d2 f07ed9e d45a3dd f07ed9e 30821be 9f45f9d b8daae8 8f5747d 30821be 9f45f9d aff682b 5ea4696 9288c3f fcc76e7 5ea4696 fcc76e7 5ea4696 fcc76e7 9288c3f fcc76e7 87f3f0e fcc76e7 87f3f0e 5a169fe 3394cb3 5ea4696 87f3f0e 3394cb3 9288c3f 3394cb3 e22c9b6 9288c3f 3394cb3 9288c3f 5a169fe 61d9d0e 5a169fe 18a300b 5a169fe 18a300b 5a169fe a83a581 5a169fe 361a5d3 5a169fe 61d9d0e 8f5747d a83a581 5a169fe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
import streamlit as st
from transformers import pipeline
from TransformationQA_ZongqianLi.transformationqa import create_qa_database
from numpy import linspace
import jsonlines
import json
st.markdown("# 🎓 Auto-generating Question-Answering Datasets with Domain-Specific Knowledge for Language Models in Scientific Tasks", unsafe_allow_html=True)
##########
# Question Answering
##########
st.markdown('## 📖 Question Answering', unsafe_allow_html=True)
st.markdown('### Select a model: ', unsafe_allow_html=True)
# 定义模型配置选项
size_lst = ["-base (110 million parameters)", "-large (340 million parameters)"]
cased_lst = ["-cased (distinguish upper and lowercase letters)", "-uncased (not distinguish upper and lowercase letters)"]
fpretrain_lst = ["None", "-scsmall (further pretrained on Solar Cell Corpus Small)", "-scmedium (further pretrained on Solar Cell Corpus Medium)", "-sclarge (further pretrained on Solar Cell Corpus Large)"]
finetune_lst = ["-squad (finetuned on the SQuAD dataset)", "-squadscqa1 (finetuned on SQuAD and Solar Cell QA Dataset (first-turn QA pairs))", "-scqa1 (finetuned on Solar Cell QA Dataset (first-turn QA pairs))", "-scqa2 (finetuned on the Solar Cell QA Dataset (whole))"]
# 为每个选项创建下拉菜单
st.markdown(f'###### bert', unsafe_allow_html=True)
size = st.selectbox("Choose a model size:", size_lst)
cased = st.selectbox("Whether distinguish upper and lowercase letters:", cased_lst)
fpretrain = st.selectbox("Further pretrained on a solar cell corpus:", fpretrain_lst)
finetune = st.selectbox("Finetuned on a QA dataset:", finetune_lst)
# 根据选择构建模型名称
if fpretrain == "None":
model = "".join(["CambridgeMolecularEngineering/bert", size.split(" (")[0], cased.split(" (")[0], finetune.split(" (")[0]])
else:
model = "".join(["CambridgeMolecularEngineering/bert", size.split(" (")[0], cased.split(" (")[0], fpretrain.split(" (")[0], finetune.split(" (")[0]])
# 显示用户选择的模型
st.write(f"Your selected model:")
st.markdown(f'###### {model}', unsafe_allow_html=True)
# 加载问答模型
pipe = pipeline("question-answering", model=model)
st.markdown('### Answer the question: ', unsafe_allow_html=True)
# 使用Session State来存储默认的问题和上下文
if 'default_question' not in st.session_state:
st.session_state['default_question'] = "What is the value of FF?"
if 'default_context' not in st.session_state:
st.session_state['default_context'] = "The referential DSSC with Pt CE was also measured under the same conditions, which yields η of 6.66% (Voc= 0.78 V, Jsc= 13.0 mA cm−2, FF = 65.9%)."
col1, col2 = st.columns(2)
with col1:
if st.button("Solar Cell QA Dataset"):
st.session_state['default_question'] = "What is the value of FF?"
st.session_state['default_context'] = "The referential DSSC with Pt CE was also measured under the same conditions, which yields η of 6.66% (Voc= 0.78 V, Jsc= 13.0 mA cm−2, FF = 65.9%)."
with col2:
if st.button("SuAD Dataset QA"):
st.session_state['default_question'] = "To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?"
st.session_state['default_context'] = """Architecturally, the school has a Catholic character. Atop the Main Building's gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary."""
# 这些输入字段现在使用session state来填充默认值
question = st.text_input("Enter the question: ", value=st.session_state['default_question'])
context = st.text_area("Enter the context: ", value=st.session_state['default_context'], height=100)
# 添加一个按钮,用户点击后执行问答
if st.button('Extract the answer'):
if context and question:
out = pipe({
'question': question,
'context': context
})
answer = out["answer"]
st.write(f"Question: {question}")
st.write(f"Answer: {answer}")
else:
st.write("Please enter both a question and context.")
##########
# Property Extraction
##########
st.markdown('## 📤 Property Extraction', unsafe_allow_html=True)
default_property = "FF"
default_context = "The referential DSSC with Pt CE was also measured under the same conditions, which yields η of 6.66% (Voc= 0.78 V, Jsc= 13.0 mA cm−2, FF = 65.9%)."
# 获取用户输入的问题和上下文
property = st.text_input("Enter the name of the property: ", value=default_property)
context = st.text_area("Enter the paper: ", value=default_context, height=100)
# 添加一个按钮,用户点击后执行问答
if st.button('Extract the property'):
question_1 = f"What is the value of {property}?"
if context and question_1:
out = pipe({
'question': question_1,
'context': context
})
value = out["answer"]
st.write(f"First-turn question: {question_1}")
st.write(f"First-turn answer: {value}")
question_2 = f"What material has {property} of {value}?"
out = pipe({
'question': question_2,
'context': context
})
material = out["answer"]
st.write(f"Second-turn question: {question_2}")
st.write(f"First-turn answer: {material}")
else:
st.write("Please enter both a question and context.")
##########
# Transformation Algorithm
##########
st.markdown('## 🖥️ QA Dataset Auto Generation', unsafe_allow_html=True)
st.markdown('### Algorithm Input', unsafe_allow_html=True)
st.markdown("###### Example of the ChemDataExtractor generated database: ", unsafe_allow_html=True)
file_path = "./CDE_properties.jsonl"
json_list = []
with open(file_path, 'r', encoding="utf-8") as file:
for line in file:
json_dict = json.loads(line.strip())
json_list.append(json_dict)
json_string = json.dumps(json_list, indent=4)
st.text_area("", value=json_string, height=200)
st.markdown('###### Example of the paper collection: ', unsafe_allow_html=True)
with open("./reference_paper.json",'r+', encoding = "utf-8") as f:
string = f.read()
json_data = json.loads(string)
json_string = json.dumps(json_data, indent=4)
st.text_area("", value=json_string, height=200)
st.markdown('### Algorithm Output', unsafe_allow_html=True)
st.markdown('###### Extracted first-turn QA pairs: ', unsafe_allow_html=True)
file_path = "./output_qa.jsonl"
json_list = []
with open(file_path, 'r', encoding="utf-8") as file:
for line in file:
json_dict = json.loads(line.strip())
json_list.append(json_dict)
json_string = json.dumps(json_list, indent=4)
st.text_area("", value=json_string, height=200)
st.markdown('###### Extracted second-turn QA pairs: ', unsafe_allow_html=True)
file_path = "output_secondqa.jsonl"
json_list = []
with open(file_path, 'r', encoding="utf-8") as file:
for line in file:
json_dict = json.loads(line.strip())
json_list.append(json_dict)
json_string = json.dumps(json_list, indent=4)
st.text_area("", value=json_string, height=200)
|