Spaces:
Running
Running
<html lang="en"> | |
<head> | |
<meta charset="UTF-8"> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
<meta name="theme-color" content="#000000"> | |
<meta name="description" content="Convert documents to clean Markdown format"> | |
<title>Basic Document Converter | PDF, EPUB, DOCX & PPTX to Markdown</title> | |
<link rel="manifest" href="/manifest.webmanifest"> | |
<script src="https://cdn.tailwindcss.com"></script> | |
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.12.313/pdf.min.js"></script> | |
<script src="https://cdnjs.cloudflare.com/ajax/libs/showdown/2.1.0/showdown.min.js"></script> | |
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/jszip.min.js"></script> | |
<script src="https://cdnjs.cloudflare.com/ajax/libs/mammoth/1.4.0/mammoth.browser.min.js"></script> | |
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/pptx2md.min.js"></script> | |
<script src="https://kit.fontawesome.com/a076d05399.js" crossorigin="anonymous"></script> | |
<style> | |
@import url('https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@400;500;700&display=swap'); | |
body { | |
font-family: 'Space Grotesk', sans-serif; | |
background-color: #F5F5F5; | |
} | |
.neo-border { | |
border: 3px solid #000; | |
box-shadow: 8px 8px 0 #000; | |
} | |
.neo-border-thick { | |
border: 4px solid #000; | |
} | |
.neo-border-thin { | |
border: 2px solid #000; | |
} | |
.neo-tab-active { | |
border-bottom: 4px solid #000; | |
font-weight: 700; | |
} | |
.neo-progress { | |
background-color: #E0E0E0; | |
border: 2px solid #000; | |
} | |
.neo-progress-bar { | |
background-color: #000; | |
} | |
.dropzone { | |
border: 3px dashed #000; | |
transition: all 0.2s; | |
} | |
.dropzone.active { | |
background-color: #FFF0F0; | |
} | |
.document-preview-container { | |
display: grid; | |
grid-template-columns: 1fr; | |
gap: 1rem; | |
height: 500px; | |
} | |
.document-preview { | |
border: 3px solid #000; | |
background-color: white; | |
overflow-y: auto; | |
height: 100%; | |
} | |
.page-canvas { | |
border: 2px solid #000; | |
margin-bottom: 1rem; | |
max-width: 100%; | |
} | |
.neo-btn { | |
border: 3px solid #000; | |
font-weight: 700; | |
letter-spacing: -0.5px; | |
transition: all 0.2s; | |
} | |
.neo-btn:hover { | |
transform: translate(-2px, -2px); | |
box-shadow: 4px 4px 0 #000; | |
} | |
.neo-btn:active { | |
transform: translate(0, 0); | |
box-shadow: none; | |
} | |
.neo-btn-primary { | |
background-color: #000; | |
color: white; | |
} | |
.neo-btn-secondary { | |
background-color: white; | |
color: black; | |
} | |
.neo-checkbox { | |
-webkit-appearance: none; | |
-moz-appearance: none; | |
appearance: none; | |
width: 20px; | |
height: 20px; | |
border: 3px solid #000; | |
margin-right: 8px; | |
position: relative; | |
top: 4px; | |
} | |
.neo-checkbox:checked { | |
background-color: #000; | |
background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='16' height='16' fill='white' viewBox='0 0 16 16'%3E%3Cpath d='M12.736 3.97a.733.733 0 0 1 1.047 0c.286.289.29.756.01 1.05L7.88 12.01a.733.733 0 0 1-1.065.02L3.217 8.384a.757.757 0 0 1 0-1.06.733.733 0 0 1 1.047 0l3.052 3.093 5.4-6.425a.247.247 0 0 1 .02-.022Z'/%3E%3C/svg%3E"); | |
background-repeat: no-repeat; | |
background-position: center; | |
} | |
#markdownOutput { | |
font-family: 'Space Mono', monospace; | |
white-space: pre-wrap; | |
background-color: white; | |
border: 3px solid #000; | |
padding: 1rem; | |
height: 100%; | |
overflow-y: auto; | |
} | |
#installBtn { | |
position: fixed; | |
bottom: 1rem; | |
right: 1rem; | |
z-index: 100; | |
} | |
.preview-slide { | |
width: 100%; | |
background-color: white; | |
padding: 1rem; | |
border: 2px solid #000; | |
margin-bottom: 1rem; | |
} | |
.file-info { | |
display: flex; | |
align-items: center; | |
gap: 0.5rem; | |
margin-bottom: 1rem; | |
} | |
.file-icon { | |
font-size: 1.5rem; | |
} | |
@media (max-width: 768px) { | |
.container { | |
padding: 1rem; | |
} | |
.neo-border { | |
box-shadow: 4px 4px 0 #000; | |
} | |
.document-preview-container { | |
grid-template-columns: 1fr; | |
height: auto; | |
} | |
.document-preview { | |
height: 300px; | |
} | |
} | |
</style> | |
</head> | |
<body class="min-h-screen"> | |
<div class="container mx-auto px-4 py-12 max-w-6xl"> | |
<div class="text-center mb-12"> | |
<h1 class="text-4xl font-bold mb-4 tracking-tight">BASIC DOCUMENT CONVERTER</h1> | |
<p class="text-xl">TRANSFORM PDF, EPUB, DOCX & PPTX TO CLEAN MARKDOWN</p> | |
</div> | |
<div class="neo-border bg-white mb-8"> | |
<div class="p-8"> | |
<div class="grid grid-cols-1 lg:grid-cols-2 gap-8"> | |
<!-- Upload Section --> | |
<div> | |
<div id="dropzone" class="dropzone rounded-none p-12 text-center cursor-pointer mb-8"> | |
<div id="uploadContent" class="flex flex-col items-center justify-center"> | |
<i class="fas fa-file-upload text-5xl mb-4"></i> | |
<h3 class="text-xl font-bold mb-2">DRAG & DROP ANY DOCUMENT HERE</h3> | |
<p class="mb-6">SUPPORTS PDF, EPUB, DOCX & PPTX</p> | |
<input type="file" id="fileInput" accept=".pdf,.epub,.docx,.pptx" class="hidden"> | |
<button id="browseBtn" class="neo-btn neo-btn-primary px-6 py-3"> | |
SELECT DOCUMENT | |
</button> | |
</div> | |
</div> | |
<!-- File Info --> | |
<div id="fileInfoContainer" class="neo-border-thin p-4 mb-4 bg-white hidden"> | |
<div class="file-info"> | |
<i id="fileIcon" class="file-icon"></i> | |
<div> | |
<h3 id="fileName" class="font-bold"></h3> | |
<p id="fileType" class="text-sm"></p> | |
</div> | |
</div> | |
</div> | |
<!-- Options --> | |
<div class="neo-border-thin p-6 mb-8 bg-white"> | |
<h3 class="font-bold text-lg mb-4">CONVERSION OPTIONS</h3> | |
<div class="space-y-4"> | |
<div class="flex items-start"> | |
<input type="checkbox" id="preserveLayout" class="neo-checkbox" checked> | |
<label for="preserveLayout" class="text-base">PRESERVE LAYOUT STRUCTURE</label> | |
</div> | |
<div class="flex items-start"> | |
<input type="checkbox" id="detectHeadings" class="neo-checkbox" checked> | |
<label for="detectHeadings" class="text-base">AUTO-DETECT HEADINGS</label> | |
</div> | |
<div class="flex items-start"> | |
<input type="checkbox" id="includeMetadata" class="neo-checkbox" checked> | |
<label for="includeMetadata" class="text-base">INCLUDE DOCUMENT METADATA</label> | |
</div> | |
</div> | |
</div> | |
<!-- Progress --> | |
<div id="progressContainer" class="hidden"> | |
<div class="flex justify-between mb-2"> | |
<span class="font-bold">CONVERSION PROGRESS</span> | |
<span id="progressPercent" class="font-bold">0%</span> | |
</div> | |
<div class="neo-progress w-full h-3 mb-2"> | |
<div id="progressBar" class="neo-progress-bar h-full" style="width: 0%"></div> | |
</div> | |
<p id="progressText" class="font-medium">PROCESSING DOCUMENT...</p> | |
</div> | |
</div> | |
<!-- Preview Section --> | |
<div> | |
<div class="flex justify-between items-center mb-4"> | |
<h3 class="font-bold text-lg">MARKDOWN OUTPUT</h3> | |
<div class="flex space-x-3"> | |
<button id="copyBtn" class="neo-btn neo-btn-secondary px-4 py-2 text-sm hidden"> | |
<i class="fas fa-copy mr-1"></i> COPY | |
</button> | |
<button id="downloadBtn" class="neo-btn neo-btn-primary px-4 py-2 text-sm hidden"> | |
<i class="fas fa-download mr-1"></i> DOWNLOAD | |
</button> | |
</div> | |
</div> | |
<div class="document-preview-container"> | |
<div id="documentPreview" class="document-preview p-4"> | |
<div id="previewContent" class="flex flex-col items-center"></div> | |
</div> | |
<div id="markdownOutput" class="document-preview p-4"></div> | |
</div> | |
</div> | |
</div> | |
</div> | |
</div> | |
<div class="text-center font-medium"> | |
<p>THIS TOOL WORKS ENTIRELY IN YOUR BROWSER. YOUR FILES ARE NEVER UPLOADED TO ANY SERVER.</p> | |
</div> | |
</div> | |
<!-- Install button (hidden by default) --> | |
<button id="installBtn" class="neo-btn neo-btn-primary px-6 py-3 hidden"> | |
<i class="fas fa-download mr-2"></i> INSTALL APP | |
</button> | |
<!-- Service Worker Registration --> | |
<script> | |
// Initialize PDF.js worker | |
pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.12.313/pdf.worker.min.js'; | |
// PWA Installation | |
let deferredPrompt; | |
const installBtn = document.getElementById('installBtn'); | |
window.addEventListener('beforeinstallprompt', (e) => { | |
e.preventDefault(); | |
deferredPrompt = e; | |
installBtn.classList.remove('hidden'); | |
}); | |
installBtn.addEventListener('click', async () => { | |
if (!deferredPrompt) return; | |
deferredPrompt.prompt(); | |
const { outcome } = await deferredPrompt.userChoice; | |
installBtn.classList.add('hidden'); | |
deferredPrompt = null; | |
}); | |
window.addEventListener('appinstalled', () => { | |
installBtn.classList.add('hidden'); | |
deferredPrompt = null; | |
}); | |
// Check if the app is running as a PWA | |
if (window.matchMedia('(display-mode: standalone)').matches || window.navigator.standalone) { | |
console.log('Running as PWA'); | |
} | |
// Register Service Worker | |
if ('serviceWorker' in navigator) { | |
window.addEventListener('load', () => { | |
navigator.serviceWorker.register('/sw.js').then(registration => { | |
console.log('ServiceWorker registration successful'); | |
}).catch(err => { | |
console.log('ServiceWorker registration failed: ', err); | |
}); | |
}); | |
} | |
// Generate manifest dynamically | |
const manifest = { | |
"name": "Universal Document Converter", | |
"short_name": "DocConvert", | |
"description": "Convert documents to clean Markdown format", | |
"start_url": "/", | |
"display": "standalone", | |
"background_color": "#F5F5F5", | |
"theme_color": "#000000", | |
"icons": [ | |
{ | |
"src": "icon-192x192.png", | |
"sizes": "192x192", | |
"type": "image/png" | |
}, | |
{ | |
"src": "icon-512x512.png", | |
"sizes": "512x512", | |
"type": "image/png" | |
}, | |
{ | |
"src": "icon-maskable-192x192.png", | |
"sizes": "192x192", | |
"type": "image/png", | |
"purpose": "maskable" | |
}, | |
{ | |
"src": "icon-maskable-512x512.png", | |
"sizes": "512x512", | |
"type": "image/png", | |
"purpose": "maskable" | |
} | |
] | |
}; | |
// Create a blob URL for the manifest | |
const manifestBlob = new Blob([JSON.stringify(manifest)], { type: 'application/json' }); | |
const manifestUrl = URL.createObjectURL(manifestBlob); | |
// Create a link element for the manifest | |
const manifestLink = document.createElement('link'); | |
manifestLink.rel = 'manifest'; | |
manifestLink.href = manifestUrl; | |
document.head.appendChild(manifestLink); | |
// Main application code | |
document.addEventListener('DOMContentLoaded', function() { | |
// DOM elements | |
const fileInput = document.getElementById('fileInput'); | |
const browseBtn = document.getElementById('browseBtn'); | |
const dropzone = document.getElementById('dropzone'); | |
const markdownOutput = document.getElementById('markdownOutput'); | |
const copyBtn = document.getElementById('copyBtn'); | |
const downloadBtn = document.getElementById('downloadBtn'); | |
const progressContainer = document.getElementById('progressContainer'); | |
const progressBar = document.getElementById('progressBar'); | |
const progressPercent = document.getElementById('progressPercent'); | |
const progressText = document.getElementById('progressText'); | |
const documentPreview = document.getElementById('documentPreview'); | |
const previewContent = document.getElementById('previewContent'); | |
const fileInfoContainer = document.getElementById('fileInfoContainer'); | |
const fileName = document.getElementById('fileName'); | |
const fileType = document.getElementById('fileType'); | |
const fileIcon = document.getElementById('fileIcon'); | |
// State variables | |
let currentMarkdown = ''; | |
let currentFilename = ''; | |
let currentFileType = ''; | |
let pdfPages = []; | |
let epubItems = []; | |
let docxPages = []; | |
let pptxSlides = []; | |
// Set up drag and drop | |
['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => { | |
dropzone.addEventListener(eventName, preventDefaults, false); | |
}); | |
function preventDefaults(e) { | |
e.preventDefault(); | |
e.stopPropagation(); | |
} | |
['dragenter', 'dragover'].forEach(eventName => { | |
dropzone.addEventListener(eventName, highlight, false); | |
}); | |
['dragleave', 'drop'].forEach(eventName => { | |
dropzone.addEventListener(eventName, unhighlight, false); | |
}); | |
function highlight() { | |
dropzone.classList.add('active'); | |
} | |
function unhighlight() { | |
dropzone.classList.remove('active'); | |
} | |
dropzone.addEventListener('drop', handleDrop, false); | |
// Fix for the browse button | |
browseBtn.addEventListener('click', function(e) { | |
e.preventDefault(); | |
fileInput.click(); | |
}); | |
fileInput.addEventListener('change', function(e) { | |
handleFiles(e); | |
}); | |
function handleDrop(e) { | |
const dt = e.dataTransfer; | |
const file = dt.files[0]; | |
if (file) { | |
const event = { target: { files: [file] } }; | |
handleFiles(event); | |
} | |
} | |
// Copy to clipboard | |
copyBtn.addEventListener('click', () => { | |
navigator.clipboard.writeText(currentMarkdown).then(() => { | |
const originalText = copyBtn.innerHTML; | |
copyBtn.innerHTML = '<i class="fas fa-check mr-1"></i> COPIED!'; | |
setTimeout(() => { | |
copyBtn.innerHTML = originalText; | |
}, 2000); | |
}); | |
}); | |
// Download markdown file | |
downloadBtn.addEventListener('click', () => { | |
const blob = new Blob([currentMarkdown], { type: 'text/markdown' }); | |
const url = URL.createObjectURL(blob); | |
const a = document.createElement('a'); | |
a.href = url; | |
a.download = `${currentFilename}.md`; | |
document.body.appendChild(a); | |
a.click(); | |
document.body.removeChild(a); | |
URL.revokeObjectURL(url); | |
}); | |
function handleFiles(e) { | |
const file = e.target.files[0]; | |
if (!file) return; | |
// Reset state | |
currentFilename = file.name.replace(/\.[^/.]+$/, "") || 'converted'; | |
currentMarkdown = ''; | |
markdownOutput.textContent = ''; | |
previewContent.innerHTML = ''; | |
pdfPages = []; | |
epubItems = []; | |
docxPages = []; | |
pptxSlides = []; | |
// Determine file type | |
if (file.name.endsWith('.pdf')) { | |
currentFileType = 'pdf'; | |
fileIcon.className = 'file-icon fas fa-file-pdf'; | |
fileType.textContent = 'PDF Document'; | |
} else if (file.name.endsWith('.epub')) { | |
currentFileType = 'epub'; | |
fileIcon.className = 'file-icon fas fa-book-open'; | |
fileType.textContent = 'EPUB eBook'; | |
} else if (file.name.endsWith('.docx')) { | |
currentFileType = 'docx'; | |
fileIcon.className = 'file-icon fas fa-file-word'; | |
fileType.textContent = 'Word Document'; | |
} else if (file.name.endsWith('.pptx')) { | |
currentFileType = 'pptx'; | |
fileIcon.className = 'file-icon fas fa-file-powerpoint'; | |
fileType.textContent = 'PowerPoint Presentation'; | |
} else { | |
alert('Unsupported file type. Please upload a PDF, EPUB, DOCX or PPTX file.'); | |
return; | |
} | |
// Show file info | |
fileName.textContent = file.name; | |
fileInfoContainer.classList.remove('hidden'); | |
// Process the file | |
processFile(file); | |
} | |
async function processFile(file) { | |
try { | |
// Show progress | |
progressContainer.classList.remove('hidden'); | |
progressBar.style.width = '0%'; | |
progressPercent.textContent = '0%'; | |
progressText.textContent = 'PROCESSING DOCUMENT...'; | |
// Clear previous content | |
markdownOutput.textContent = 'LOADING...'; | |
previewContent.innerHTML = ''; | |
// Dispatch to appropriate converter | |
switch(currentFileType) { | |
case 'pdf': | |
await convertPdfToMarkdown(file); | |
break; | |
case 'epub': | |
await convertEpubToMarkdown(file); | |
break; | |
case 'docx': | |
await convertDocxToMarkdown(file); | |
break; | |
case 'pptx': | |
await convertPptxToMarkdown(file); | |
break; | |
} | |
// Show action buttons | |
copyBtn.classList.remove('hidden'); | |
downloadBtn.classList.remove('hidden'); | |
// Update progress | |
progressBar.style.width = '100%'; | |
progressPercent.textContent = '100%'; | |
progressText.textContent = 'CONVERSION COMPLETE!'; | |
} catch (error) { | |
console.error(`Error converting ${currentFileType}:`, error); | |
markdownOutput.textContent = `ERROR: ${error.message}`; | |
progressText.textContent = 'CONVERSION FAILED'; | |
} | |
} | |
async function convertPdfToMarkdown(file) { | |
const arrayBuffer = await file.arrayBuffer(); | |
const loadingTask = pdfjsLib.getDocument(arrayBuffer); | |
const pdf = await loadingTask.promise; | |
let markdownContent = ''; | |
const totalPages = pdf.numPages; | |
// Process each page | |
for (let i = 1; i <= totalPages; i++) { | |
const page = await pdf.getPage(i); | |
// Update progress | |
const progress = Math.round((i / totalPages) * 100); | |
progressBar.style.width = `${progress}%`; | |
progressPercent.textContent = `${progress}%`; | |
progressText.textContent = `PROCESSING PAGE ${i} OF ${totalPages}...`; | |
// Get text content | |
const textContent = await page.getTextContent(); | |
const pageText = textContent.items.map(item => item.str).join(' '); | |
// Add page separator if not first page | |
if (i > 1) { | |
markdownContent += `\n\n---\n\n`; | |
} | |
// Add page number | |
markdownContent += `# PAGE ${i}\n\n`; | |
// Add the text content | |
markdownContent += pageText; | |
// Render PDF preview | |
const viewport = page.getViewport({ scale: 0.8 }); | |
const canvas = document.createElement('canvas'); | |
const context = canvas.getContext('2d'); | |
canvas.height = viewport.height; | |
canvas.width = viewport.width; | |
canvas.className = 'page-canvas'; | |
await page.render({ | |
canvasContext: context, | |
viewport: viewport | |
}).promise; | |
pdfPages.push(canvas); | |
} | |
// Display preview | |
pdfPages.forEach(page => { | |
previewContent.appendChild(page); | |
}); | |
// Post-process the markdown | |
currentMarkdown = postProcessMarkdown(markdownContent); | |
markdownOutput.textContent = currentMarkdown; | |
} | |
async function convertEpubToMarkdown(file) { | |
const arrayBuffer = await file.arrayBuffer(); | |
const zip = await JSZip.loadAsync(arrayBuffer); | |
let markdownContent = ''; | |
// Get container.xml to find the rootfile | |
const containerData = await zip.file('META-INF/container.xml').async('text'); | |
const rootFilePath = containerData.match(/<rootfile[^>]*full-path="([^"]*)"/)[1]; | |
// Parse the rootfile (usually content.opf) | |
const rootFileData = await zip.file(rootFilePath).async('text'); | |
const parser = new DOMParser(); | |
const opfDoc = parser.parseFromString(rootFileData, 'application/xml'); | |
// Extract metadata if enabled | |
if (document.getElementById('includeMetadata').checked) { | |
const metadata = opfDoc.querySelector('metadata'); | |
if (metadata) { | |
markdownContent += '# EPUB METADATA\n\n'; | |
const title = metadata.querySelector('title')?.textContent; | |
if (title) markdownContent += `**TITLE:** ${title}\n\n`; | |
const creator = metadata.querySelector('creator')?.textContent; | |
if (creator) markdownContent += `**AUTHOR:** ${creator}\n\n`; | |
const date = metadata.querySelector('date')?.textContent; | |
if (date) markdownContent += `**DATE:** ${date}\n\n`; | |
const publisher = metadata.querySelector('publisher')?.textContent; | |
if (publisher) markdownContent += `**PUBLISHER:** ${publisher}\n\n`; | |
markdownContent += '---\n\n'; | |
} | |
} | |
// Get the manifest (list of all files) | |
const manifest = {}; | |
opfDoc.querySelectorAll('manifest item').forEach(item => { | |
manifest[item.getAttribute('id')] = item.getAttribute('href'); | |
}); | |
// Get the spine (reading order) | |
const spineItems = opfDoc.querySelectorAll('spine itemref'); | |
const totalItems = spineItems.length; | |
let processedItems = 0; | |
// Process each spine item | |
for (const item of spineItems) { | |
const idref = item.getAttribute('idref'); | |
const href = manifest[idref]; | |
if (!href) continue; | |
// Get the full path to the content file | |
const contentPath = rootFilePath.split('/').slice(0, -1).join('/'); | |
const fullPath = contentPath ? `${contentPath}/${href}` : href; | |
// Update progress | |
processedItems++; | |
const progress = Math.round((processedItems / totalItems) * 100); | |
progressBar.style.width = `${progress}%`; | |
progressPercent.textContent = `${progress}%`; | |
progressText.textContent = `PROCESSING ITEM ${processedItems} OF ${totalItems}...`; | |
// Read the content file | |
const contentFile = zip.file(fullPath); | |
if (!contentFile) continue; | |
const content = await contentFile.async('text'); | |
// Parse HTML content | |
const contentDoc = parser.parseFromString(content, 'text/html'); | |
// Remove scripts and styles | |
contentDoc.querySelectorAll('script, style').forEach(el => el.remove()); | |
// Convert to text with basic formatting | |
let itemContent = ''; | |
// Process headings | |
contentDoc.querySelectorAll('h1, h2, h3, h4, h5, h6').forEach(heading => { | |
const level = parseInt(heading.tagName.substring(1)); | |
itemContent += `${'#'.repeat(level)} ${heading.textContent}\n\n`; | |
}); | |
// Process paragraphs | |
contentDoc.querySelectorAll('p').forEach(p => { | |
itemContent += `${p.textContent}\n\n`; | |
}); | |
// Process lists | |
contentDoc.querySelectorAll('ul, ol').forEach(list => { | |
const isOrdered = list.tagName === 'OL'; | |
list.querySelectorAll('li').forEach((li, index) => { | |
const prefix = isOrdered ? `${index + 1}.` : '-'; | |
itemContent += `${prefix} ${li.textContent}\n`; | |
}); | |
itemContent += '\n'; | |
}); | |
// Add to markdown content | |
markdownContent += itemContent; | |
// Add separator if not last item | |
if (processedItems < totalItems) { | |
markdownContent += '\n---\n\n'; | |
} | |
// Create preview element | |
const previewDiv = document.createElement('div'); | |
previewDiv.className = 'preview-slide'; | |
const previewTitle = document.createElement('h4'); | |
previewTitle.className = 'font-bold mb-2'; | |
previewTitle.textContent = href; | |
const previewContentDiv = document.createElement('div'); | |
previewContentDiv.className = 'text-sm'; | |
previewContentDiv.textContent = itemContent.substring(0, 500) + (itemContent.length > 500 ? '...' : ''); | |
previewDiv.appendChild(previewTitle); | |
previewDiv.appendChild(previewContentDiv); | |
epubItems.push(previewDiv); | |
} | |
// Display preview | |
epubItems.forEach(item => { | |
previewContent.appendChild(item); | |
}); | |
// Post-process the markdown | |
currentMarkdown = postProcessMarkdown(markdownContent); | |
markdownOutput.textContent = currentMarkdown; | |
} | |
async function convertDocxToMarkdown(file) { | |
const arrayBuffer = await file.arrayBuffer(); | |
// Convert DOCX to Markdown using mammoth.js | |
const result = await mammoth.extractRawText({ arrayBuffer: arrayBuffer }); | |
// Get the raw text | |
let markdownContent = result.value; | |
// Split into pages (simulated - DOCX doesn't have pages) | |
const pageSize = 2000; // Approximate characters per page | |
const pageCount = Math.ceil(markdownContent.length / pageSize); | |
// Create preview elements | |
for (let i = 0; i < pageCount; i++) { | |
const start = i * pageSize; | |
const end = start + pageSize; | |
const pageText = markdownContent.substring(start, end); | |
const previewDiv = document.createElement('div'); | |
previewDiv.className = 'preview-slide'; | |
const previewTitle = document.createElement('h4'); | |
previewTitle.className = 'font-bold mb-2'; | |
previewTitle.textContent = `Page ${i + 1}`; | |
const previewContentDiv = document.createElement('div'); | |
previewContentDiv.className = 'text-sm'; | |
previewContentDiv.textContent = pageText.substring(0, 500) + (pageText.length > 500 ? '...' : ''); | |
previewDiv.appendChild(previewTitle); | |
previewDiv.appendChild(previewContentDiv); | |
docxPages.push(previewDiv); | |
// Update progress | |
const progress = Math.round((i / pageCount) * 100); | |
progressBar.style.width = `${progress}%`; | |
progressPercent.textContent = `${progress}%`; | |
progressText.textContent = `PROCESSING DOCUMENT...`; | |
} | |
// Display preview | |
docxPages.forEach(page => { | |
previewContent.appendChild(page); | |
}); | |
// Post-process the markdown | |
currentMarkdown = postProcessMarkdown(markdownContent); | |
markdownOutput.textContent = currentMarkdown; | |
} | |
async function convertPptxToMarkdown(file) { | |
const arrayBuffer = await file.arrayBuffer(); | |
// Convert PPTX to Markdown using pptx2md | |
const result = await pptx2md(arrayBuffer); | |
// Get the markdown content | |
let markdownContent = result.markdown; | |
// Create preview elements for each slide | |
result.slides.forEach((slide, index) => { | |
const previewDiv = document.createElement('div'); | |
previewDiv.className = 'preview-slide'; | |
const previewTitle = document.createElement('h4'); | |
previewTitle.className = 'font-bold mb-2'; | |
previewTitle.textContent = `Slide ${index + 1}`; | |
const previewContentDiv = document.createElement('div'); | |
previewContentDiv.className = 'text-sm'; | |
// Create a simplified preview of the slide content | |
let previewText = ''; | |
if (slide.title) previewText += `# ${slide.title}\n\n`; | |
if (slide.notes) previewText += `${slide.notes}\n\n`; | |
if (slide.bodies && slide.bodies.length > 0) { | |
previewText += slide.bodies.map(body => body.text).join('\n\n'); | |
} | |
previewContentDiv.textContent = previewText.substring(0, 500) + (previewText.length > 500 ? '...' : ''); | |
previewDiv.appendChild(previewTitle); | |
previewDiv.appendChild(previewContentDiv); | |
pptxSlides.push(previewDiv); | |
// Update progress | |
const progress = Math.round((index / result.slides.length) * 100); | |
progressBar.style.width = `${progress}%`; | |
progressPercent.textContent = `${progress}%`; | |
progressText.textContent = `PROCESSING SLIDE ${index + 1} OF ${result.slides.length}...`; | |
}); | |
// Display preview | |
pptxSlides.forEach(slide => { | |
previewContent.appendChild(slide); | |
}); | |
// Post-process the markdown | |
currentMarkdown = postProcessMarkdown(markdownContent); | |
markdownOutput.textContent = currentMarkdown; | |
} | |
function postProcessMarkdown(text) { | |
// Simple markdown formatting | |
let result = text; | |
// Detect headings based on font size (simplified) | |
if (document.getElementById('detectHeadings').checked) { | |
// This is a simplified version - a real implementation would need more sophisticated detection | |
result = result.replace(/(\n\n[A-Z][^\n]{10,}\n)/g, '\n\n## $1'); | |
} | |
// Preserve some layout structure | |
if (document.getElementById('preserveLayout').checked) { | |
// Convert multiple newlines to markdown paragraphs | |
result = result.replace(/\n{3,}/g, '\n\n'); | |
} | |
return result; | |
} | |
}); | |
</script> | |
<!-- Inline Service Worker --> | |
<script> | |
// Register service worker with inline code | |
if ('serviceWorker' in navigator) { | |
const swCode = ` | |
const CACHE_NAME = 'doc-converter-v3'; | |
const ASSETS_TO_CACHE = [ | |
'/', | |
'/index.html', | |
'https://cdn.tailwindcss.com', | |
'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.12.313/pdf.min.js', | |
'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.12.313/pdf.worker.min.js', | |
'https://cdnjs.cloudflare.com/ajax/libs/showdown/2.1.0/showdown.min.js', | |
'https://cdn.jsdelivr.net/npm/[email protected]/dist/jszip.min.js', | |
'https://cdnjs.cloudflare.com/ajax/libs/mammoth/1.4.0/mammoth.browser.min.js', | |
'https://cdn.jsdelivr.net/npm/[email protected]/dist/pptx2md.min.js', | |
'https://kit.fontawesome.com/a076d05399.js', | |
'https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@400;500;700&display=swap' | |
]; | |
self.addEventListener('install', (event) => { | |
event.waitUntil( | |
caches.open(CACHE_NAME) | |
.then((cache) => { | |
return cache.addAll(ASSETS_TO_CACHE); | |
}) | |
); | |
}); | |
self.addEventListener('fetch', (event) => { | |
event.respondWith( | |
caches.match(event.request) | |
.then((response) => { | |
return response || fetch(event.request); | |
}) | |
); | |
}); | |
self.addEventListener('activate', (event) => { | |
const cacheWhitelist = [CACHE_NAME]; | |
event.waitUntil( | |
caches.keys().then((cacheNames) => { | |
return Promise.all( | |
cacheNames.map((cacheName) => { | |
if (cacheWhitelist.indexOf(cacheName) === -1) { | |
return caches.delete(cacheName); | |
} | |
}) | |
); | |
}) | |
); | |
}); | |
`; | |
const blob = new Blob([swCode], { type: 'application/javascript' }); | |
const swUrl = URL.createObjectURL(blob); | |
navigator.serviceWorker.register(swUrl) | |
.then(registration => { | |
console.log('Service Worker registered with scope:', registration.scope); | |
}) | |
.catch(error => { | |
console.log('Service Worker registration failed:', error); | |
}); | |
} | |
</script> | |
<p style="border-radius: 8px; text-align: center; font-size: 12px; color: #fff; margin-top: 16px;position: fixed; left: 8px; bottom: 8px; z-index: 10; background: rgba(0, 0, 0, 0.8); padding: 4px 8px;">Made with <img src="https://enzostvs-deepsite.hf.space/logo.svg" alt="DeepSite Logo" style="width: 16px; height: 16px; vertical-align: middle;display:inline-block;margin-right:3px;filter:brightness(0) invert(1);"><a href="https://enzostvs-deepsite.hf.space" style="color: #fff;text-decoration: underline;" target="_blank" >DeepSite</a> - 🧬 <a href="https://enzostvs-deepsite.hf.space?remix=ihansel/documentconversion" style="color: #fff;text-decoration: underline;" target="_blank" >Remix</a></p></body> | |
</html> |