st / app.py
lijk20's picture
Upload 2 files
c9e13b4
raw
history blame
26.8 kB
import io
import docx
import configparser
import pandas as pd
import asyncio
from docx import Document
from docxtpl import DocxTemplate
from docx.shared import Pt
from docx.opc.constants import RELATIONSHIP_TYPE as RT
from docx.enum.dml import MSO_THEME_COLOR_INDEX
from docx.enum.style import WD_STYLE_TYPE
from docx.shared import Cm, Inches
from docx.oxml.shared import OxmlElement
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from docx.oxml.ns import qn
from docx.shared import RGBColor
from docx.enum.text import WD_COLOR_INDEX
from requests_toolbelt import MultipartEncoder
from datetime import datetime, timedelta
def count_values(df, col_name):
value_counts = df[col_name].value_counts()
result_df = pd.DataFrame(value_counts)
result_df.columns = ['count']
result_df.reset_index(inplace=True)
result_df.rename(columns={'index': col_name}, inplace=True)
return result_df
def add_hyperlink(paragraph, url, text):
"""
A function that places a hyperlink within a paragraph object.
:param paragraph: The paragraph we are adding the hyperlink to.
:param url: A string containing the required url
:param text: The text displayed for the url
:return: A Run object containing the hyperlink
"""
# This gets access to the document.xml.rels file and gets a new relation id value
part = paragraph.part
r_id = part.relate_to(url, RT.HYPERLINK, is_external=True)
# Create the w:hyperlink tag and add needed values
hyperlink = OxmlElement('w:hyperlink')
hyperlink.set(qn('r:id'), r_id, )
hyperlink.set(qn('w:history'), '1')
# Create a w:r element
new_run = OxmlElement('w:r')
# Create a new w:rPr element
rPr = OxmlElement('w:rPr')
# Create a w:rStyle element, note this currently does not add the hyperlink style as its not in
# the default template, I have left it here in case someone uses one that has the style in it
rStyle = OxmlElement('w:rStyle')
rStyle.set(qn('w:val'), 'Hyperlink')
# Join all the xml elements together add add the required text to the w:r element
rPr.append(rStyle)
new_run.append(rPr)
new_run.text = text
hyperlink.append(new_run)
# Create a new Run object and add the hyperlink into it
r = paragraph.add_run()
r._r.append(hyperlink)
# A workaround for the lack of a hyperlink style (doesn't go purple after using the link)
# Delete this if using a template that has the hyperlink style in it
r.font.color.theme_color = MSO_THEME_COLOR_INDEX.HYPERLINK
r.font.underline = True
return r
def create_table(document,count_df1):
table = document.add_table(rows=2, cols=2)
# 设置表格宽度
table.columns[0].width = docx.shared.Inches(3.7)
table.columns[1].width = docx.shared.Inches(3.7)
# 设置表格边框
table.style = 'Table Grid'
# 设置表格第一行内容
table.rows[0].height = docx.shared.Pt(9)
first_row_cells = table.rows[0].cells
first_row_cells[0].text = "技术进展"
first_row_cells[0].paragraphs[0].alignment = docx.enum.text.WD_PARAGRAPH_ALIGNMENT.CENTER
first_row_cells[1].text = "业内动态"
first_row_cells[1].paragraphs[0].alignment = docx.enum.text.WD_PARAGRAPH_ALIGNMENT.CENTER
# 设置第一行字体
font = first_row_cells[0].paragraphs[0].runs[0].font
font.name = "思源黑体 Regular"
first_row_cells[0].paragraphs[0].runs[0]._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular")
font.size = docx.shared.Pt(8)
font.bold = True
font = first_row_cells[1].paragraphs[0].runs[0].font
font.name = "思源黑体 Regular"
first_row_cells[1].paragraphs[0].runs[0]._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular")
font.size = docx.shared.Pt(8)
font.bold = True
# 设置表格第二行内容
second_row_cells = table.rows[1].cells
second_row_cells[0].text = '''\t图像理解与生成 \t{0}项\n\t计算光学 \t{1}项\n\t图像处理 \t{2}项\n\t机器学习前沿 \t{3}项\n\t自然语言交互 \t{4}项\n\t量子计算 \t{5}项\n\t计算机视觉前沿 \t{6}项'''.format(count_df1[0],count_df1[1],count_df1[2],
count_df1[3],count_df1[4],count_df1[5],count_df1[6])
second_row_cells[0].paragraphs[0].alignment = docx.enum.text.WD_PARAGRAPH_ALIGNMENT.LEFT
second_row_cells[1].text = "\t大厂动态 \t{0}项\n".format(count_df1[7])
second_row_cells[1].paragraphs[0].alignment = docx.enum.text.WD_PARAGRAPH_ALIGNMENT.LEFT
# 设置第二行字体
font = second_row_cells[0].paragraphs[0].runs[0].font
font.name = "思源黑体 Regular"
second_row_cells[0].paragraphs[0].runs[0]._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular")
font.size = docx.shared.Pt(8)
font = second_row_cells[1].paragraphs[0].runs[0].font
font.name = "思源黑体 Regular"
second_row_cells[1].paragraphs[0].runs[0]._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular")
font.size = docx.shared.Pt(8)
# 设置行高
table.rows[0].height = docx.shared.Pt(9)
def 荣耀周报排版(xlsx,template):
document = Document(template)
df = pd.read_excel(xlsx)
res = df.sort_values(by='领域', ascending=True)
count_df = count_values(df, '领域')
count_df1 = count_df.sort_values(by='领域', ascending=True)["count"]
count_df1 = list(count_df1)
sections = ["图像理解与生成", "计算光学", "图像处理", "机器学习前沿", "自然语言交互", "计算机视觉前沿","量子计算", "定向追踪"]
# 开头标注时间 思源黑体 Regular 四号
try:
date_style = document.styles['date_range']
date_style.font.name = "思源黑体 Regular"
date_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular")
date_style.font.bold = True
date_style.font.size = Pt(14)
except:
date_style = document.styles.add_style('date_range', 1)
date_style.font.name = "思源黑体 Regular"
date_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular")
date_style.font.bold = True
date_style.font.size = Pt(14)
# 设置标题样式 思源黑体 Bold 三号
try:
title_style = document.styles['title2']
title_style.font.name = "思源黑体 Bold"
title_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Bold")
title_style.font.bold = True
title_style.font.size = Pt(16)
except:
title_style = document.styles.add_style('title2',1)
title_style.font.name = "思源黑体 Bold"
title_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Bold")
title_style.font.bold = True
title_style.font.size = Pt(16)
try:
title_style = document.styles['title']
title_style.base_style = document.styles['Heading 1']
title_style.font.name = "思源黑体 Bold"
title_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Bold")
title_style.font.color.rgb=RGBColor(0,0,0)
title_style.font.bold = True
title_style.font.size = Pt(16)
except:
title_style = document.styles.add_style('title',1)
title_style.base_style = document.styles['Heading 1']
title_style.font.name = "思源黑体 Bold"
title_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Bold")
title_style.font.color.rgb=RGBColor(0,0,0)
title_style.font.bold = True
title_style.font.size = Pt(16)
# 热点速览技术进展小标题 思源黑体 小五 下划线
try:
tech_style = document.styles['tech_progress']
tech_style.font.name = "思源黑体 Regular"
tech_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular")
tech_style.font.bold = False
tech_style.font.size = Pt(9)
tech_style.font.underline = True
except:
tech_style = document.styles.add_style('tech_progress', 1)
tech_style.font.name = "思源黑体 Regular"
tech_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular")
tech_style.font.bold = False
tech_style.font.size = Pt(9)
tech_style.font.underline = True
# 热点速览注释与详情 思源黑体 小五
try:
cont_style = document.styles['content']
cont_style.font.name = "思源黑体 Regular"
cont_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular")
cont_style.font.bold = False
cont_style.font.size = Pt(9)
cont_style.font.color.rgb=RGBColor(89,89,89)
except:
cont_style = document.styles.add_style('content', 1)
cont_style.font.name = "思源黑体 Regular"
cont_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular")
cont_style.font.bold = False
cont_style.font.size = Pt(9)
cont_style.font.color.rgb=RGBColor(89,89,89)
# 思源黑体 小四 --部分正文--段落
try:
part1_style = document.styles['weekly_summary']
part1_style.font.name = "思源黑体 Regular"
part1_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular")
part1_style.font.bold = False
part1_style.font.size = Pt(12)
except:
part1_style = document.styles.add_style('weekly_summary', 1)
part1_style.font.name = "思源黑体 Regular"
part1_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular")
part1_style.font.bold = False
part1_style.font.size = Pt(12)
# 思源黑体 小四 --部分正文--字符
try:
part2_style = document.styles['inside_para']
part2_style.font.name = "思源黑体 Regular"
part2_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular")
part2_style.font.bold = False
part2_style.font.size = Pt(12)
except:
part2_style = document.styles.add_style('inside_para', 2)
part2_style.font.name = "思源黑体 Regular"
part2_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular")
part2_style.font.bold = False
part2_style.font.size = Pt(12)
# 思源黑体 Regular 11号字--热点正文--段落
try:
part3_style = document.styles['part3_style']
part3_style.font.name = "思源黑体 Regular"
part3_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular")
part3_style.font.bold = False
part3_style.font.size = Pt(11)
except:
part3_style = document.styles.add_style('part3_style', 1)
part3_style.font.name = "思源黑体 Regular"
part3_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular")
part3_style.font.bold = False
part3_style.font.size = Pt(11)
# 定向追踪-技术进展 思源黑体 Bold 四号
try:
tech1_style = document.styles['tech']
tech1_style.font.name = "思源黑体 Bold"
tech1_style.base_style = document.styles['Heading 1']
tech1_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Bold")
tech1_style.font.color.rgb=RGBColor(0,0,0)
tech1_style.font.bold = True
tech1_style.font.size = Pt(14)
except:
tech1_style = document.styles.add_style('tech',1)
tech1_style.font.name = "思源黑体 Bold"
tech1_style.base_style = document.styles['Heading 1']
tech1_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Bold")
tech1_style.font.color.rgb=RGBColor(0,0,0)
tech1_style.font.bold = True
tech1_style.font.size = Pt(14)
try:
tech1_style = document.styles['tech2']
tech1_style.font.name = "思源黑体 Bold"
tech1_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Bold")
tech1_style.font.bold = True
tech1_style.font.size = Pt(14)
except:
tech1_style = document.styles.add_style('tech2',1)
tech1_style.font.name = "思源黑体 Bold"
tech1_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Bold")
tech1_style.font.bold = True
tech1_style.font.size = Pt(14)
# 定向追踪-技术进展 思源黑体 Regular 小四号,背景灰色-25%
try:
tech1_style = document.styles['tech1']
tech1_style.font.name = "思源黑体 Regular"
tech1_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular")
tech1_style.font.bold = True
tech1_style.font.size = Pt(12)
tech1_style.font.highlight_color=WD_COLOR_INDEX.GRAY_25
except:
tech1_style = document.styles.add_style('tech1',1)
tech1_style.font.name = "思源黑体 Regular"
tech1_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular")
tech1_style.font.bold = True
tech1_style.font.size = Pt(12)
tech1_style.font.highlight_color=WD_COLOR_INDEX.GRAY_25
# 思源黑体 Bold 小四 --定向追踪标题时间--段落
try:
part4_style = document.styles['title_date']
part4_style.font.name = "思源黑体 Bold"
part4_style._element.rPr.rFonts.set(qn('w:eastAsia'),"思源黑体 Bold")
part4_style.font.bold = False
part4_style.font.size = Pt(12)
except:
part4_style = document.styles.add_style('title_date', 1)
part4_style.font.name = "思源黑体 Bold"
part4_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Bold")
part4_style.font.bold = False
part4_style.font.size = Pt(12)
# 思源黑体 Light 10 --定向追踪技术--段落
try:
part4_style = document.styles['tech_detail']
part4_style.font.name = "思源黑体 Light"
part4_style._element.rPr.rFonts.set(qn('w:eastAsia'),"思源黑体 Light")
part4_style.font.bold = False
part4_style.font.size = Pt(10)
except:
part4_style = document.styles.add_style('tech_detail', 1)
part4_style.font.name = "思源黑体 Light"
part4_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Light")
part4_style.font.bold = False
part4_style.font.size = Pt(10)
# 定向追踪-专家点评 思源黑体 Regular 小四号
try:
tech1_style = document.styles['expert']
tech1_style.font.name = "思源黑体 Regular"
tech1_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular")
tech1_style.font.bold = True
tech1_style.font.size = Pt(12)
except:
tech1_style = document.styles.add_style('expert',1)
tech1_style.font.name = "思源黑体 Regular"
tech1_style._element.rPr.rFonts.set(qn('w:eastAsia'), "思源黑体 Regular")
tech1_style.font.bold = True
tech1_style.font.size = Pt(12)
# 设置标题
titles = ['一、本期目录', '二、热点速览', '三、定向追踪']
t1 = "2023 年 x 月 x 日 —— 2023 年 x 月 x 日"
t2 = "【本期荣耀周报内容概览】"
para1 = document.add_paragraph(t1)
para1.style = document.styles["date_range"]
run = para1.add_run(" ")
run.style = document.styles["inside_para"]
run = para1.add_run(t2)
run.style = document.styles["inside_para"]
run.font.bold = False
document.add_paragraph("",style = "weekly_summary")
#一、本期目录
document.add_paragraph(titles[0], style='title')
document.add_paragraph("", style='title2')
# 二、热点速览
document.add_paragraph(titles[1], style='title')
document.add_paragraph("", style='weekly_summary')
document.add_paragraph("【本周期热点总结】", style='weekly_summary')
document.add_paragraph("", style='weekly_summary')
document.add_paragraph("以下为本周期热点速览,以事件发生时间排序。", style='weekly_summary')
# 添加段落
document.add_paragraph("", style='tech_progress')
document.add_paragraph("技术进展 · 【领域】 · 【涉及技术】", style='tech_progress')
document.add_paragraph("【技术进展正文】", style='part3_style')
document.add_paragraph("热点注释:", style='content')
document.add_paragraph("查看详情:", style='content')
document.add_paragraph("", style='content')
# 三、定向追踪
document.add_paragraph(titles[2], style='title')
document.add_paragraph("", style='title2')
for section in sections:
section2 = section
if section != "定向追踪":
text1 = "技术进展 · "+section2
document.add_paragraph(text1, style='tech')
document.add_paragraph("", style='tech2')
document.add_paragraph("进展聚焦", style='tech1')
document.add_paragraph("", style='tech1')
document.add_paragraph("【占位】", style='weekly_summary')
document.add_paragraph("", style='tech1')
document.add_paragraph("进展详情", style='tech1')
# 表格创建
num = int(count_df[count_df["领域"].str.contains(section)]["count"])
table = document.add_table(rows=num, cols=1)
# table.style = 'Table Grid'
# 表格填充
res1 = res[res["领域"].str.contains(section)].sort_values(by = "时间",ascending=False)
for i, row in enumerate(table.rows):
for cell in row.cells:
cell.text = ""
cell.paragraphs[0].style = "title_date"
old_format = '%Y.%m.%d'
new_format = '%Y-%m-%d'
# 将日期字符串转换为 datetime 对象
date_str = res1.iloc[i]["时间"]
date_obj = datetime.strptime(date_str, old_format)
# 将 datetime 对象转换为新的日期格式字符串
new_date_str = datetime.strftime(date_obj, new_format)
text1 = str(new_date_str )+ " | "+ str(res1.iloc[i]["标题"])
cell.add_paragraph(text1)
cell.paragraphs[1].style = "title_date"
text2 = "· "+ str(res1.iloc[i]["涉及技术"])
cell.add_paragraph(text2)
cell.paragraphs[2].style="tech_detail"
text3 = str(res1.iloc[i]["简述(摘要)"])
cell.add_paragraph(text3)
cell.paragraphs[3].style = "part3_style"
add_hyperlink(cell.paragraphs[3], res1.iloc[i]["源链接"], "原文链接")
text4 = ""
cell.add_paragraph(text4)
cell.paragraphs[4].style = "part3_style"
if res1.iloc[i]["是否点评"] == "是":
text5 = "专家点评"
cell.add_paragraph(text5)
cell.paragraphs[5].style = "expert"
text6 = ""
cell.add_paragraph(text6)
cell.paragraphs[5].style = "expert"
document.add_page_break()
elif section == "定向追踪":
text1 = "业内动态 · "+"产品发布"
document.add_paragraph(text1, style='tech')
document.add_paragraph("", style='tech2')
document.add_paragraph("进展聚焦", style='tech1')
document.add_paragraph("", style='tech1')
document.add_paragraph("【占位】", style='weekly_summary')
document.add_paragraph("", style='tech1')
document.add_paragraph("进展详情", style='tech1')
# 表格创建
table = document.add_table(rows=1, cols=1)
# table.style = 'Table Grid'
# 表格填充
for row in table.rows:
for cell in row.cells:
cell.text = ""
cell.paragraphs[0].style = "title_date"
# 将日期字符串转换为 datetime 对象
text1 = "yyyy-mm-dd"+ " | "+ "【标题占位】"
cell.add_paragraph(text1)
cell.paragraphs[1].style = "title_date"
text2 = "· "+ "【涉及技术】"
cell.add_paragraph(text2)
cell.paragraphs[2].style="tech_detail"
text3 = "【简述(摘要)占位】"
cell.add_paragraph(text3)
cell.paragraphs[3].style = "part3_style"
add_hyperlink(cell.paragraphs[3], "【】", "【原文链接占位】")
text4 = ""
cell.add_paragraph(text4)
cell.paragraphs[4].style = "part3_style"
text5 = "【专家点评占位】"
cell.add_paragraph(text5)
cell.paragraphs[5].style = "expert"
text6 = ""
cell.add_paragraph(text6)
cell.paragraphs[5].style = "expert"
document.add_page_break()
text1 = "业内动态 · "+"大厂动态"
document.add_paragraph(text1, style='tech')
document.add_paragraph("", style='tech2')
document.add_paragraph("进展聚焦", style='tech1')
document.add_paragraph("", style='tech1')
document.add_paragraph("【占位】", style='weekly_summary')
document.add_paragraph("", style='tech1')
document.add_paragraph("进展详情", style='tech1')
# 表格创建
table = document.add_table(rows=1, cols=1)
# table.style = 'Table Grid'
# 表格填充
for row in table.rows:
for cell in row.cells:
cell.text = ""
cell.paragraphs[0].style = "title_date"
# 将日期字符串转换为 datetime 对象
text1 = "yyyy-mm-dd"+ " | "+ "【标题占位】"
cell.add_paragraph(text1)
cell.paragraphs[1].style = "title_date"
text2 = "· "+ "【涉及技术】"
cell.add_paragraph(text2)
cell.paragraphs[2].style="tech_detail"
text3 = "【简述(摘要)占位】"
cell.add_paragraph(text3)
cell.paragraphs[3].style = "part3_style"
add_hyperlink(cell.paragraphs[3], "【】", "【原文链接占位】")
text4 = ""
cell.add_paragraph(text4)
cell.paragraphs[4].style = "part3_style"
text5 = "【专家点评占位】"
cell.add_paragraph(text5)
cell.paragraphs[5].style = "expert"
text6 = ""
cell.add_paragraph(text6)
cell.paragraphs[5].style = "expert"
document.add_page_break()
text1 = "业内动态 · "+"项目开源"
document.add_paragraph(text1, style='tech')
document.add_paragraph("", style='tech2')
document.add_paragraph("进展聚焦", style='tech1')
document.add_paragraph("", style='tech1')
document.add_paragraph("【占位】", style='weekly_summary')
document.add_paragraph("", style='tech1')
document.add_paragraph("进展详情", style='tech1')
# 表格创建
table = document.add_table(rows=1, cols=1)
# table.style = 'Table Grid'
# 表格填充
for row in table.rows:
for cell in row.cells:
cell.text = ""
cell.paragraphs[0].style = "title_date"
# 将日期字符串转换为 datetime 对象
text1 = "yyyy-mm-dd"+ " | "+ "【标题占位】"
cell.add_paragraph(text1)
cell.paragraphs[1].style = "title_date"
text2 = "· "+ "【涉及技术】"
cell.add_paragraph(text2)
cell.paragraphs[2].style="tech_detail"
text3 = "【简述(摘要)占位】"
cell.add_paragraph(text3)
cell.paragraphs[3].style = "part3_style"
add_hyperlink(cell.paragraphs[3], "【】", "【原文链接占位】")
text4 = ""
cell.add_paragraph(text4)
cell.paragraphs[4].style = "part3_style"
text5 = "【专家点评占位】"
cell.add_paragraph(text5)
cell.paragraphs[5].style = "expert"
text6 = ""
cell.add_paragraph(text6)
cell.paragraphs[5].style = "expert"
return document
import pandas as pd
import docx
# Gradio 部分
import gradio as gr
import streamlit as st
from io import BytesIO
# def excel_to_docx(xlsx):
# # 处理 Excel 文件并生成 docx 文件
# document,name = 荣耀周报排版(xlsx)
# return document.getvalue()
# 定义 Gradio 的输入和输出界面
# inputs = gr.inputs.File(label="Excel 文件", type=["file"])
# outputs = gr.outputs.File(label="docx 文件")
st.title('Translator App')
st.markdown("Translate from Docx file")
st.subheader("File Upload")
datas=st.file_uploader("Original File")
template=st.file_uploader("template File")
name=st.text_input('Enter New File Name: ')
stream = BytesIO()
if st.button(label='生成'):
st.spinner('Waiting...')
document= 荣耀周报排版(datas,template)
out = document.save(stream)
st.success("Translated")
st.download_button(label='Download Translated File',file_name=(f"{name}.docx"), data=stream.getvalue())