Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -12,6 +12,9 @@ import PyPDF2
|
|
12 |
|
13 |
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
|
14 |
|
|
|
|
|
|
|
15 |
app.layout = dbc.Container([
|
16 |
html.H1("Auto-Wiki", className="my-4"),
|
17 |
dcc.Upload(
|
@@ -37,7 +40,8 @@ app.layout = dbc.Container([
|
|
37 |
dbc.Progress(id="upload-progress", label="Upload Progress", style={"visibility": "hidden"}),
|
38 |
dbc.Progress(id="conversion-progress", label="Conversion Progress", style={"visibility": "hidden"}),
|
39 |
dbc.Button("Convert and Download", id="convert-button", color="primary", className="mt-3", disabled=True),
|
40 |
-
dcc.Download(id="download-zip")
|
|
|
41 |
])
|
42 |
|
43 |
def process_docx(contents, filename):
|
@@ -59,6 +63,28 @@ def process_pdf(contents, filename):
|
|
59 |
full_text.append(page.extract_text())
|
60 |
return '\n\n'.join(full_text)
|
61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
@app.callback(
|
63 |
[Output('upload-output', 'children'),
|
64 |
Output('convert-button', 'disabled'),
|
@@ -69,11 +95,13 @@ def process_pdf(contents, filename):
|
|
69 |
Output('download-zip', 'data')],
|
70 |
[Input('upload-data', 'contents'),
|
71 |
Input('upload-data', 'filename'),
|
72 |
-
Input('convert-button', 'n_clicks')
|
|
|
73 |
[State('upload-data', 'contents'),
|
74 |
State('upload-data', 'filename')]
|
75 |
)
|
76 |
-
def update_output(list_of_contents, list_of_names, n_clicks, contents, filenames):
|
|
|
77 |
ctx = callback_context
|
78 |
if not ctx.triggered:
|
79 |
return no_update
|
@@ -96,31 +124,17 @@ def update_output(list_of_contents, list_of_names, n_clicks, contents, filenames
|
|
96 |
if not contents:
|
97 |
return no_update
|
98 |
|
99 |
-
|
100 |
-
|
101 |
-
for i, (c, n) in enumerate(zip(contents, filenames)):
|
102 |
-
if n.lower().endswith('.docx'):
|
103 |
-
text = process_docx(c, n)
|
104 |
-
elif n.lower().endswith('.pdf'):
|
105 |
-
text = process_pdf(c, n)
|
106 |
-
else:
|
107 |
-
continue # Skip unsupported file types
|
108 |
-
md = markdown.markdown(text)
|
109 |
-
processed_files.append((n.replace('.docx', '.md').replace('.pdf', '.md'), md))
|
110 |
-
time.sleep(0.1) # Simulate processing time
|
111 |
-
app.callback_context.response.set_data(f'{{"progress": {(i+1)/len(contents)*100}}}')
|
112 |
-
|
113 |
-
zip_buffer = io.BytesIO()
|
114 |
-
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
|
115 |
-
for name, content in processed_files:
|
116 |
-
zip_file.writestr(name, content)
|
117 |
-
|
118 |
-
return zip_buffer.getvalue()
|
119 |
-
|
120 |
-
thread = threading.Thread(target=process_files)
|
121 |
thread.start()
|
122 |
|
123 |
-
return no_update, True, 100, {"visibility": "visible"}, 0, {"visibility": "visible"},
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
|
125 |
return no_update
|
126 |
|
|
|
12 |
|
13 |
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
|
14 |
|
15 |
+
# Global variable to track conversion progress
|
16 |
+
conversion_progress = 0
|
17 |
+
|
18 |
app.layout = dbc.Container([
|
19 |
html.H1("Auto-Wiki", className="my-4"),
|
20 |
dcc.Upload(
|
|
|
40 |
dbc.Progress(id="upload-progress", label="Upload Progress", style={"visibility": "hidden"}),
|
41 |
dbc.Progress(id="conversion-progress", label="Conversion Progress", style={"visibility": "hidden"}),
|
42 |
dbc.Button("Convert and Download", id="convert-button", color="primary", className="mt-3", disabled=True),
|
43 |
+
dcc.Download(id="download-zip"),
|
44 |
+
dcc.Interval(id='interval-component', interval=500, n_intervals=0)
|
45 |
])
|
46 |
|
47 |
def process_docx(contents, filename):
|
|
|
63 |
full_text.append(page.extract_text())
|
64 |
return '\n\n'.join(full_text)
|
65 |
|
66 |
+
def process_files(contents, filenames):
|
67 |
+
global conversion_progress
|
68 |
+
processed_files = []
|
69 |
+
for i, (c, n) in enumerate(zip(contents, filenames)):
|
70 |
+
if n.lower().endswith('.docx'):
|
71 |
+
text = process_docx(c, n)
|
72 |
+
elif n.lower().endswith('.pdf'):
|
73 |
+
text = process_pdf(c, n)
|
74 |
+
else:
|
75 |
+
continue # Skip unsupported file types
|
76 |
+
md = markdown.markdown(text)
|
77 |
+
processed_files.append((n.replace('.docx', '.md').replace('.pdf', '.md'), md))
|
78 |
+
conversion_progress = (i + 1) / len(contents) * 100
|
79 |
+
time.sleep(0.1) # Simulate processing time
|
80 |
+
|
81 |
+
zip_buffer = io.BytesIO()
|
82 |
+
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
|
83 |
+
for name, content in processed_files:
|
84 |
+
zip_file.writestr(name, content)
|
85 |
+
|
86 |
+
return zip_buffer.getvalue()
|
87 |
+
|
88 |
@app.callback(
|
89 |
[Output('upload-output', 'children'),
|
90 |
Output('convert-button', 'disabled'),
|
|
|
95 |
Output('download-zip', 'data')],
|
96 |
[Input('upload-data', 'contents'),
|
97 |
Input('upload-data', 'filename'),
|
98 |
+
Input('convert-button', 'n_clicks'),
|
99 |
+
Input('interval-component', 'n_intervals')],
|
100 |
[State('upload-data', 'contents'),
|
101 |
State('upload-data', 'filename')]
|
102 |
)
|
103 |
+
def update_output(list_of_contents, list_of_names, n_clicks, n_intervals, contents, filenames):
|
104 |
+
global conversion_progress
|
105 |
ctx = callback_context
|
106 |
if not ctx.triggered:
|
107 |
return no_update
|
|
|
124 |
if not contents:
|
125 |
return no_update
|
126 |
|
127 |
+
conversion_progress = 0
|
128 |
+
thread = threading.Thread(target=process_files, args=(contents, filenames))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
thread.start()
|
130 |
|
131 |
+
return no_update, True, 100, {"visibility": "visible"}, 0, {"visibility": "visible"}, None
|
132 |
+
|
133 |
+
if ctx.triggered[0]['prop_id'] == 'interval-component.n_intervals':
|
134 |
+
if conversion_progress == 100:
|
135 |
+
return no_update, False, 100, {"visibility": "visible"}, 100, {"visibility": "visible"}, dcc.send_bytes(process_files(contents, filenames), "converted_files.zip")
|
136 |
+
else:
|
137 |
+
return no_update, True, 100, {"visibility": "visible"}, conversion_progress, {"visibility": "visible"}, None
|
138 |
|
139 |
return no_update
|
140 |
|