Spaces:
Paused
Paused
File size: 4,395 Bytes
a736130 a166383 a736130 1bb1cee 4869991 a736130 171b356 a736130 eb7c4fb a736130 e777af8 a736130 1bb1cee a736130 1bb1cee ee2db4c 1bb1cee 171b356 49e035d e777af8 1bb1cee 49e035d a736130 eb7c4fb a736130 e777af8 a736130 e777af8 1879f8f a736130 a166383 a736130 171b356 a736130 eb7c4fb a166383 a736130 e777af8 a166383 a736130 e777af8 a736130 eb7c4fb a736130 a166383 a736130 9cb0e5f a736130 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import base64
import io
import os
import zipfile
from dash import Dash, dcc, html, Input, Output, State, callback_context, no_update
import dash_bootstrap_components as dbc
import threading
import time
import pypandoc
import os
# Check if pandoc is installed
try:
pypandoc.get_pandoc_version()
except OSError:
# If not found, attempt to download and install
print("Pandoc not found. Attempting to download...")
pypandoc.download_pandoc()
# Verify installation
try:
pypandoc.get_pandoc_version()
print("Pandoc successfully installed.")
except OSError:
print("Failed to install Pandoc. Please install it manually.")
exit(1)
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
app.layout = dbc.Container([
html.H1("Auto-Wiki", className="my-4"),
dcc.Upload(
id='upload-data',
children=html.Div([
'Drag and Drop or ',
html.A('Select Files')
]),
style={
'width': '100%',
'height': '60px',
'lineHeight': '60px',
'borderWidth': '1px',
'borderStyle': 'dashed',
'borderRadius': '5px',
'textAlign': 'center',
'margin': '10px'
},
multiple=True,
accept='.docx,.pdf'
),
html.Div(id='upload-output'),
html.Div(id="upload-status", style={"display": "none"}),
html.Div(id="conversion-status", style={"display": "none"}),
dbc.Button("Convert and Download", id="convert-button", color="primary", className="mt-3", disabled=True),
dcc.Download(id="download-zip")
])
def process_file(contents, filename):
content_type, content_string = contents.split(',')
decoded = base64.b64decode(content_string)
with open(filename, 'wb') as f:
f.write(decoded)
md_content = pypandoc.convert_file(filename, 'md')
os.remove(filename) # Clean up the temporary file
return md_content
def process_files(contents, filenames):
processed_files = []
for c, n in zip(contents, filenames):
if n.lower().endswith(('.docx', '.pdf')):
text = process_file(c, n)
processed_files.append((n.rsplit('.', 1)[0] + '.md', text))
time.sleep(0.1) # Simulate processing time
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
for name, content in processed_files:
zip_file.writestr(name, content)
return zip_buffer.getvalue()
@app.callback(
[Output('upload-output', 'children'),
Output('convert-button', 'disabled'),
Output('upload-status', 'children'),
Output('upload-status', 'style'),
Output('conversion-status', 'children'),
Output('conversion-status', 'style'),
Output('download-zip', 'data')],
[Input('upload-data', 'contents'),
Input('upload-data', 'filename'),
Input('convert-button', 'n_clicks')],
[State('upload-data', 'contents'),
State('upload-data', 'filename')]
)
def update_output(list_of_contents, list_of_names, n_clicks, contents, filenames):
ctx = callback_context
if not ctx.triggered:
return no_update
if ctx.triggered[0]['prop_id'] == 'upload-data.contents':
if list_of_contents is not None:
children = [
html.Div([
html.H5(f"File uploaded: {name}"),
html.Hr()
]) for name in list_of_names
]
return children, False, "Files uploaded successfully", {"display": "block"}, "", {"display": "none"}, None
return no_update
if ctx.triggered[0]['prop_id'] == 'convert-button.n_clicks':
if n_clicks is None or not contents:
return no_update
def process_and_download():
zip_data = process_files(contents, filenames)
return dcc.send_bytes(zip_data, "converted_files.zip")
return (
no_update,
True,
"",
{"display": "none"},
"Converting files... This may take a moment.",
{"display": "block"},
process_and_download()
)
return no_update
if __name__ == '__main__':
print("Starting the Dash application...")
app.run(debug=False, host='0.0.0.0', port=7860)
print("Dash application has finished running.") |