Spaces:

Aye10032
/

MyTools

Sleeping

App Files Files Community

Aye10032 commited on May 22, 2024

Commit

529e208

1 Parent(s): 02fb4e4

update

Browse files

Files changed (12) hide show

.gitignore +1 -0
.streamlit/config.toml +6 -0
App.py +23 -0
README.md +1 -1
pages/Reference.py +196 -0
pages/Reformat.py +65 -0
pages/TextToImage.py +92 -0
pages/Translate.py +137 -0
requirements.txt +6 -0
ui/Component.py +13 -0
utils/Decorator.py +41 -0
utils/__init__.py +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ /config.yaml

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,6 @@

+[browser]
+gatherUsageStats = false
+[client]
+showSidebarNavigation = false
+toolbarMode = "auto"

App.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import streamlit as st
+from ui.Component import side_bar_links
+st.set_page_config(
+    page_title='工具箱',
+    page_icon='🔨',
+)
+with st.sidebar:
+    side_bar_links()
+st.markdown("""
+# 自用小工具
+## 文本格式化
+将PDF中直接复制的文本中的换行符去除，并将引用转化为markdown格式。
+## 引用文献生成
+处理PUBMED的NLM格式的引用文献，并转化为yaml格式，方便存储在markdown文件中
+""")

README.md CHANGED Viewed

@@ -5,7 +5,7 @@ colorFrom: red
 colorTo: yellow
 sdk: streamlit
 sdk_version: 1.34.0
-app_file: app.py
 pinned: false
 license: gpl-3.0
 ---

 colorTo: yellow
 sdk: streamlit
 sdk_version: 1.34.0
+app_file: App.py
 pinned: false
 license: gpl-3.0
 ---

pages/Reference.py ADDED Viewed

	@@ -0,0 +1,196 @@

+import random
+from typing import Dict, Any
+import requests
+import streamlit as st
+import yaml
+from loguru import logger
+from ui.Component import side_bar_links
+from bs4 import BeautifulSoup
+from utils.Decorator import retry
+st.set_page_config(
+    page_title='工具箱',
+    page_icon='🔨',
+    layout='wide',
+)
+with st.sidebar:
+    side_bar_links()
+def add():
+    ref_list: list = st.session_state.get('reference_list')
+    _data = {
+        'title': st.session_state.get('title'),
+        'pmid': st.session_state.get('pmid').replace('PMID:', '').replace(' ', ''),
+        'pmc': st.session_state.get('pmc').replace('PMCID:', '').replace(' ', ''),
+        'doi': st.session_state.get('doi').replace('DOI:', '').replace(' ', ''),
+    }
+    if _data in ref_list:
+        st.toast('already exist')
+    else:
+        ref_list.append(_data)
+        st.session_state.reference_list = ref_list
+        yaml_str = yaml.dump(ref_list)
+        st.session_state.reference_text = yaml_str
+        st.session_state['title'] = ''
+        st.session_state['pmid'] = ''
+        st.session_state['pmc'] = ''
+        st.session_state['doi'] = ''
+def reset():
+    st.session_state.reference_list = []
+    st.session_state.reference_text = ''
+# def anal_ml():
+#     nlm_str: str = st.session_state.get('nlm_text')
+#     nlm_list = nlm_str.split('.', 4)
+#     title = nlm_list[1]
+#     id_list = nlm_list[-1].split('; ')
+#     if len(id_list) > 1:
+#         pmc = id_list[-1]
+#     else:
+#         pmc = ''
+#     base_list = id_list[0].split('. ')
+#     doi = base_list[0]
+#     pmid = base_list[1]
+#
+#     _data = {
+#         'title': title[1:] if title.startswith(' ') else title,
+#         'pmid': pmid.replace('PMID:', '').replace(' ', ''),
+#         'pmc': pmc.replace('PMCID:', '').replace(' ', '').replace('.', ''),
+#         'doi': doi.replace('doi:', '').replace(' ', ''),
+#     }
+#
+#     ref_list: list = st.session_state.get('reference_list')
+#
+#     if _data in ref_list:
+#         st.toast('already exist')
+#     else:
+#         ref_list.append(_data)
+#         st.session_state.reference_list = ref_list
+#         yaml_str = yaml.dump(ref_list)
+#         st.session_state.reference_text = yaml_str
+#
+#     st.session_state['nlm_text'] = ''
+def get_data():
+    term: str = st.session_state.get('term_text')
+    term = term.replace('\r', ' ').replace('\n', '')
+    _data = __get_info(term)
+    ref_list: list = st.session_state.get('reference_list')
+    if _data in ref_list:
+        st.toast('already exist')
+    else:
+        ref_list.append(_data)
+        st.session_state.reference_list = ref_list
+        yaml_str = yaml.dump(ref_list, width=999)
+        st.session_state.reference_text = yaml_str
+    st.session_state['term_text'] = ''
+@retry(delay=random.uniform(2.0, 5.0))
+def __get_info(pmid: str) -> Dict[str, Any]:
+    url = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id={pmid}&retmode=xml'
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
+    }
+    response = requests.request("GET", url, headers=headers, timeout=10)
+    if response.status_code == 200:
+        soup = BeautifulSoup(response.text, 'xml')
+        title = soup.find('Article').find('ArticleTitle').text if soup.find('Article') else None
+        doi_block = soup.find('ArticleIdList').find('ArticleId', {'IdType': 'doi'})
+        if doi_block:
+            doi = doi_block.text
+        else:
+            doi = ''
+            logger.warning('DOI not found')
+        pmc_block = soup.find('ArticleIdList').find('ArticleId', {'IdType': 'pmc'})
+        if pmc_block:
+            pmc = pmc_block.text.replace('PMC', '')
+        else:
+            pmc = ''
+        return {
+            'title': title,
+            'pmid': pmid,
+            'pmc': pmc,
+            'doi': doi
+        }
+def del_item():
+    index: int = st.session_state.get('delete_id')
+    ref_list: list = st.session_state.get('reference_list')
+    ref_list.pop(index)
+    yaml_str = yaml.dump(ref_list, width=999)
+    st.session_state.reference_text = yaml_str
+    st.session_state.reference_list = ref_list
+st.title("引用格式化")
+col1, col2 = st.columns([1, 1], gap="medium")
+if 'reference_list' not in st.session_state:
+    st.session_state.reference_list = []
+if 'reference_text' not in st.session_state:
+    st.session_state.reference_text = ''
+with col1:
+    with st.expander('manual'):
+        st.text_input('title', key='title')
+        col1_1, col1_2 = st.columns([1, 1], gap="small")
+        col1_1.text_input('pmid', key='pmid')
+        col1_2.text_input('pmc', key='pmc')
+        st.text_input('doi', key='doi')
+        col2_1, col2_2 = st.columns([1, 1], gap="small")
+        col2_1.button('add', use_container_width=True, type='primary', on_click=add)
+        col2_2.button('reset', use_container_width=True, on_click=reset)
+    st.text_input('Search', key='term_text')
+    st.button('add', use_container_width=True, on_click=get_data)
+    if len(st.session_state.get('reference_list')) > 0:
+        st.divider()
+        st.write('共有', len(st.session_state.get('reference_list')), '条引用')
+        col3_1, col3_2 = st.columns([2, 1], gap='small')
+        col3_1.number_input(
+            'id',
+            min_value=0,
+            max_value=len(st.session_state.get('reference_list')) - 1,
+            key='delete_id',
+            label_visibility='collapsed'
+        )
+        col3_2.button('delete', type='primary', on_click=del_item)
+with col2:
+    with st.container(height=486, border=True):
+        st.write(st.session_state.get('reference_list'))
+st.code(st.session_state.get('reference_text'), language='yaml')

pages/Reformat.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import re
+import streamlit as st
+from ui.Component import side_bar_links
+st.set_page_config(
+    page_title='工具箱',
+    page_icon='🔨',
+    layout='wide',
+)
+with st.sidebar:
+    side_bar_links()
+st.title("格式化工具")
+def re_format(origin_str: str) -> str:
+    new_str = origin_str.replace('\r', '').replace('\n', '')
+    matches = re.findall(r'\[\s*\d+(?:,\s*\d+)*]', new_str)
+    for match in matches:
+        match_str: str = match
+        new_ref = ''.join([
+            f"[^{ind.replace(' ', '')}]"
+            for ind in match_str.replace('[', '').replace(']', '').split(',')
+        ])
+        new_str = new_str.replace(match, new_ref)
+    matches = re.findall(r'\[\s*\d+(?:-\s*\d+)*]', new_str)
+    for match in matches:
+        match_str: str = match
+        match_str = match_str.replace('[', '').replace(']', '')
+        a = int(match_str.split('-')[0].strip())
+        b = int(match_str.split('-')[-1].strip())
+        new_ref = ''.join([
+            f'[^{i}]'
+            for i in range(a, b + 1)
+        ])
+        new_str = new_str.replace(match, new_ref)
+    return new_str
+col1, col2 = st.columns([1, 1], gap="medium")
+if 'markdown_text' not in st.session_state:
+    st.session_state.markdown_text = ''
+with col1.container(height=520, border=True):
+    st.markdown(st.session_state.markdown_text)
+with col2:
+    st.code(st.session_state.markdown_text, language='markdown')
+if prompt := st.chat_input():
+    response = re_format(prompt)
+    st.session_state.markdown_text = response
+    st.rerun()

pages/TextToImage.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import os
+import requests
+import urllib3
+import streamlit as st
+from loguru import logger
+from ui.Component import side_bar_links
+st.set_page_config(
+    page_title='工具箱',
+    page_icon='🔨',
+    layout='wide',
+)
+with st.sidebar:
+    side_bar_links()
+    st.text_input('Api_key', type='password', key='api_key')
+st.title('CogView 文生图')
+def generate_image_url(prompt: str) -> str:
+    from zhipuai import ZhipuAI
+    api = st.session_state.get('api_key')
+    if api != '':
+        client = ZhipuAI(api_key=api)  # 请填写您自己的APIKey
+        response = client.images.generations(
+            model="cogview-3",
+            prompt=prompt,
+        )
+        return response.data[0].url
+    else:
+        st.error('请先输入API！')
+def download_img(img_url: str) -> str:
+    r = requests.get(img_url, stream=True)
+    if r.status_code == 200:
+        filename = img_url.split('/')[-1]
+        filepath = f'/home/aye/Service/MyTools/image/{filename}'
+        open(filepath, 'wb').write(r.content)
+        del r
+        return filepath
+    else:
+        st.error('download fail')
+if 'filepath' not in st.session_state:
+    st.session_state['filepath'] = ''
+if os.path.exists(st.session_state.get('filepath')):
+    with st.chat_message('user'):
+        st.write(st.session_state.get('image_prompt'))
+    with st.chat_message('ai'):
+        path: str = st.session_state.get('filepath')
+        st.image(path)
+        with open(path, "rb") as file:
+            btn = st.download_button(
+                label="下载",
+                data=file,
+                file_name=path.split('/')[-1],
+                mime="image/png"
+            )
+if image_prompt := st.chat_input(key='image_prompt'):
+    with st.chat_message('user'):
+        logger.info(image_prompt)
+        st.write(image_prompt)
+    with st.spinner('正在生成图片...'):
+        url = generate_image_url(image_prompt)
+        logger.info(url)
+    with st.spinner('正在下载图片...'):
+        path = download_img(url)
+        st.session_state['filepath'] = path
+    with st.chat_message('ai'):
+        st.image(path)
+        with open(path, "rb") as file:
+            btn = st.download_button(
+                label="下载",
+                data=file,
+                file_name=url.split('/')[-1],
+                mime="image/png"
+            )

pages/Translate.py ADDED Viewed

	@@ -0,0 +1,137 @@

+import os
+import httpx
+import streamlit as st
+import yaml
+from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_openai import ChatOpenAI
+from loguru import logger
+from ui.Component import side_bar_links
+st.set_page_config(
+    page_title='工具箱',
+    page_icon='🔨',
+    layout='wide',
+)
+st.title("一键生成翻译总结")
+with st.sidebar:
+    side_bar_links()
+    st.toggle('去除换行', key='trans_reformat')
+    st.toggle('总结', key='trans_conclusion')
+    st.toggle('输出格式', key='trans_text_mode')
+    if st.session_state.get('trans_text_mode'):
+        st.caption('markdown')
+    else:
+        st.caption('latex')
+def get_translate_and_conclude(question: str, step: int):
+    if step == 0:
+        _prompt = ChatPromptTemplate.from_messages(
+            [
+                SystemMessage("You are an AI academic assistant and should answer user questions rigorously."),
+                ("human",
+                 "你将收到一个论文的片段。首先，将这段文本以学术风格**翻译为中文**，不要漏句。对于所有的特殊符号和latex代码，请保持原样不要改变。"
+                 "对于文中一些显得与上下文突兀的数字，很大可能是引用文献，请使用latex语法将它们表示为一个上标，并使用美元符号包围，如$^2$。这是你要翻译的文献片段:\n{question}"),
+            ]
+        )
+    elif step == 1:
+        _prompt = ChatPromptTemplate.from_messages(
+            [
+                SystemMessage(content="You are an AI academic assistant and should answer user questions rigorously."),
+                HumanMessage(
+                    content=f"""首先，将这段文本**翻译为中文**，不要漏句。对于所有的特殊符号和latex代码，请保持原样不要改变:
+                    {st.session_state.translate_messages[-3]}"""
+                ),
+                AIMessage(content=str(st.session_state.translate_messages[-2])),
+                HumanMessage(content=question),
+            ]
+        )
+    else:
+        raise Exception("Wrong step value")
+    llm = ChatOpenAI(
+        model_name="gpt-3.5-turbo",
+        temperature=0,
+        openai_api_key=st.secrets['gpt_key'],
+        streaming=True
+    )
+    chain = _prompt | llm
+    if step == 0:
+        llm_result = chain.stream({"question": question})
+    else:
+        llm_result = chain.stream({"question": question})
+    return llm_result
+col1, col2 = st.columns([1, 1], gap="medium")
+if 'translate_messages' not in st.session_state:
+    st.session_state.translate_messages = []
+if 'markdown_text' not in st.session_state:
+    st.session_state.markdown_text = ''
+chat_container = col1.container(height=610, border=False)
+with chat_container:
+    for message in st.session_state.translate_messages:
+        icon = 'logo.png' if message['role'] != 'user' else None
+        with st.chat_message(message['role']):
+            st.markdown(message['content'])
+with col2:
+    if st.session_state.markdown_text != '':
+        with st.container(height=520, border=True):
+            st.markdown(st.session_state.markdown_text)
+        if st.session_state.get('trans_text_mode'):
+            st.code(st.session_state.markdown_text, language='markdown')
+        else:
+            st.code(st.session_state.markdown_text, language='latex')
+if prompt := st.chat_input():
+    st.session_state.translate_messages = []
+    if st.session_state.get('trans_reformat'):
+        prompt = prompt.replace("\n", " ").replace("\r", "")
+    logger.info(f'[translate]: {prompt}')
+    prompt = prompt.replace('$', r'\$')
+    chat_container.chat_message("human").write(prompt)
+    st.session_state.translate_messages.append({'role': 'user', 'content': prompt})
+    response = get_translate_and_conclude(prompt, 0)
+    translate_result = chat_container.chat_message("ai").write_stream(response)
+    st.session_state.translate_messages.append({'role': 'assistant', 'content': translate_result})
+    if st.session_state.get('trans_conclusion'):
+        query = "接下来，请用两到四句话总结一下这段文本的内容"
+        chat_container.chat_message("human").write(query)
+        st.session_state.translate_messages.append({'role': 'user', 'content': query})
+        response = get_translate_and_conclude(query, 1)
+        conclusion_result = chat_container.chat_message("ai").write_stream(response)
+        logger.info(f"(conclude): {conclusion_result}")
+        st.session_state.translate_messages.append({'role': 'assistant', 'content': conclusion_result})
+        if st.session_state.get('trans_text_mode'):
+            markdown_text = f"""{prompt}\t\r\n{translate_result}\t\r\n> {conclusion_result}"""
+        else:
+            markdown_text = f"""{prompt}\n\n{translate_result}\n\n\\tbox{{ {conclusion_result} }}"""
+            markdown_text = markdown_text.replace('%', r'\%')
+        st.session_state.markdown_text = markdown_text
+    else:
+        markdown_text = f"""{prompt}\t\r\n{translate_result}"""
+        st.session_state.markdown_text = markdown_text
+    st.rerun()

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+loguru
+bs4
+PyYAML
+langchain
+langchain_openai
+lxml

ui/Component.py ADDED Viewed

	@@ -0,0 +1,13 @@

+import streamlit as st
+def side_bar_links():
+    st.header('工具箱')
+    st.page_link('App.py', label='Home', icon='🏠')
+    st.page_link('pages/Reformat.py', label='文本格式化', icon='📖')
+    st.page_link('pages/Reference.py', label='引用文献生成', icon='📙')
+    st.page_link('pages/Translate.py', label='翻译总结工具', icon='🌐')
+    st.page_link('pages/TextToImage.py', label='文生图', icon='🎨')
+    st.divider()

utils/Decorator.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import time
+from functools import wraps
+from typing import Callable, Any
+from loguru import logger
+def retry(retries: int = 3, delay: float = 1) -> Callable:
+    """
+    为函数提供重试逻辑的装饰器。
+    参数:
+    retries (int): 最大重试次数，默认为3。
+    delay (float): 两次重试之间的延迟时间（秒），默认为1。
+    返回:
+    Callable: 被装饰的函数。
+    异常:
+    ValueError: 如果retries小于1或delay小于等于0，则抛出此异常。
+    """
+    if retries < 1 or delay <= 0:
+        raise ValueError('Wrong param')
+    def decorator(func: Callable) -> Callable:
+        @wraps(func)
+        def wrapper(*args, **kwargs) -> Any:
+            for i in range(1, retries + 1):
+                try:
+                    return func(*args, **kwargs)
+                except Exception as e:
+                    if i == retries:
+                        logger.error(f'Error: {repr(e)}.')
+                        logger.error(f'"{func.__name__}()" failed after {retries} retries.')
+                        break
+                    else:
+                        logger.debug(f'Error: {repr(e)} -> Retrying...')
+                        time.sleep(delay)
+        return wrapper
+    return decorator

utils/__init__.py ADDED Viewed

File without changes