bluenevus commited on
Commit
49e035d
·
verified ·
1 Parent(s): 030e6f9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -26
app.py CHANGED
@@ -12,6 +12,9 @@ import PyPDF2
12
 
13
  app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
14
 
 
 
 
15
  app.layout = dbc.Container([
16
  html.H1("Auto-Wiki", className="my-4"),
17
  dcc.Upload(
@@ -37,7 +40,8 @@ app.layout = dbc.Container([
37
  dbc.Progress(id="upload-progress", label="Upload Progress", style={"visibility": "hidden"}),
38
  dbc.Progress(id="conversion-progress", label="Conversion Progress", style={"visibility": "hidden"}),
39
  dbc.Button("Convert and Download", id="convert-button", color="primary", className="mt-3", disabled=True),
40
- dcc.Download(id="download-zip")
 
41
  ])
42
 
43
  def process_docx(contents, filename):
@@ -59,6 +63,28 @@ def process_pdf(contents, filename):
59
  full_text.append(page.extract_text())
60
  return '\n\n'.join(full_text)
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  @app.callback(
63
  [Output('upload-output', 'children'),
64
  Output('convert-button', 'disabled'),
@@ -69,11 +95,13 @@ def process_pdf(contents, filename):
69
  Output('download-zip', 'data')],
70
  [Input('upload-data', 'contents'),
71
  Input('upload-data', 'filename'),
72
- Input('convert-button', 'n_clicks')],
 
73
  [State('upload-data', 'contents'),
74
  State('upload-data', 'filename')]
75
  )
76
- def update_output(list_of_contents, list_of_names, n_clicks, contents, filenames):
 
77
  ctx = callback_context
78
  if not ctx.triggered:
79
  return no_update
@@ -96,31 +124,17 @@ def update_output(list_of_contents, list_of_names, n_clicks, contents, filenames
96
  if not contents:
97
  return no_update
98
 
99
- def process_files():
100
- processed_files = []
101
- for i, (c, n) in enumerate(zip(contents, filenames)):
102
- if n.lower().endswith('.docx'):
103
- text = process_docx(c, n)
104
- elif n.lower().endswith('.pdf'):
105
- text = process_pdf(c, n)
106
- else:
107
- continue # Skip unsupported file types
108
- md = markdown.markdown(text)
109
- processed_files.append((n.replace('.docx', '.md').replace('.pdf', '.md'), md))
110
- time.sleep(0.1) # Simulate processing time
111
- app.callback_context.response.set_data(f'{{"progress": {(i+1)/len(contents)*100}}}')
112
-
113
- zip_buffer = io.BytesIO()
114
- with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
115
- for name, content in processed_files:
116
- zip_file.writestr(name, content)
117
-
118
- return zip_buffer.getvalue()
119
-
120
- thread = threading.Thread(target=process_files)
121
  thread.start()
122
 
123
- return no_update, True, 100, {"visibility": "visible"}, 0, {"visibility": "visible"}, dcc.send_bytes(process_files(), "converted_files.zip")
 
 
 
 
 
 
124
 
125
  return no_update
126
 
 
12
 
13
  app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
14
 
15
+ # Global variable to track conversion progress
16
+ conversion_progress = 0
17
+
18
  app.layout = dbc.Container([
19
  html.H1("Auto-Wiki", className="my-4"),
20
  dcc.Upload(
 
40
  dbc.Progress(id="upload-progress", label="Upload Progress", style={"visibility": "hidden"}),
41
  dbc.Progress(id="conversion-progress", label="Conversion Progress", style={"visibility": "hidden"}),
42
  dbc.Button("Convert and Download", id="convert-button", color="primary", className="mt-3", disabled=True),
43
+ dcc.Download(id="download-zip"),
44
+ dcc.Interval(id='interval-component', interval=500, n_intervals=0)
45
  ])
46
 
47
  def process_docx(contents, filename):
 
63
  full_text.append(page.extract_text())
64
  return '\n\n'.join(full_text)
65
 
66
+ def process_files(contents, filenames):
67
+ global conversion_progress
68
+ processed_files = []
69
+ for i, (c, n) in enumerate(zip(contents, filenames)):
70
+ if n.lower().endswith('.docx'):
71
+ text = process_docx(c, n)
72
+ elif n.lower().endswith('.pdf'):
73
+ text = process_pdf(c, n)
74
+ else:
75
+ continue # Skip unsupported file types
76
+ md = markdown.markdown(text)
77
+ processed_files.append((n.replace('.docx', '.md').replace('.pdf', '.md'), md))
78
+ conversion_progress = (i + 1) / len(contents) * 100
79
+ time.sleep(0.1) # Simulate processing time
80
+
81
+ zip_buffer = io.BytesIO()
82
+ with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
83
+ for name, content in processed_files:
84
+ zip_file.writestr(name, content)
85
+
86
+ return zip_buffer.getvalue()
87
+
88
  @app.callback(
89
  [Output('upload-output', 'children'),
90
  Output('convert-button', 'disabled'),
 
95
  Output('download-zip', 'data')],
96
  [Input('upload-data', 'contents'),
97
  Input('upload-data', 'filename'),
98
+ Input('convert-button', 'n_clicks'),
99
+ Input('interval-component', 'n_intervals')],
100
  [State('upload-data', 'contents'),
101
  State('upload-data', 'filename')]
102
  )
103
+ def update_output(list_of_contents, list_of_names, n_clicks, n_intervals, contents, filenames):
104
+ global conversion_progress
105
  ctx = callback_context
106
  if not ctx.triggered:
107
  return no_update
 
124
  if not contents:
125
  return no_update
126
 
127
+ conversion_progress = 0
128
+ thread = threading.Thread(target=process_files, args=(contents, filenames))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  thread.start()
130
 
131
+ return no_update, True, 100, {"visibility": "visible"}, 0, {"visibility": "visible"}, None
132
+
133
+ if ctx.triggered[0]['prop_id'] == 'interval-component.n_intervals':
134
+ if conversion_progress == 100:
135
+ return no_update, False, 100, {"visibility": "visible"}, 100, {"visibility": "visible"}, dcc.send_bytes(process_files(contents, filenames), "converted_files.zip")
136
+ else:
137
+ return no_update, True, 100, {"visibility": "visible"}, conversion_progress, {"visibility": "visible"}, None
138
 
139
  return no_update
140