|
|
|
let pdfFile = null; |
|
let markdownContent = ''; |
|
let translationContent = ''; |
|
let imagesData = []; |
|
|
|
|
|
const mistralApiKeyInput = document.getElementById('mistralApiKey'); |
|
const toggleMistralKeyBtn = document.getElementById('toggleMistralKey'); |
|
const rememberMistralKeyCheckbox = document.getElementById('rememberMistralKey'); |
|
const translationApiKeyInput = document.getElementById('translationApiKey'); |
|
const toggleTranslationKeyBtn = document.getElementById('toggleTranslationKey'); |
|
const rememberTranslationKeyCheckbox = document.getElementById('rememberTranslationKey'); |
|
|
|
const translationModelSelect = document.getElementById('translationModel'); |
|
const customModelSettings = document.getElementById('customModelSettings'); |
|
|
|
|
|
const advancedSettingsToggle = document.getElementById('advancedSettingsToggle'); |
|
const advancedSettings = document.getElementById('advancedSettings'); |
|
const advancedSettingsIcon = document.getElementById('advancedSettingsIcon'); |
|
const maxTokensPerChunk = document.getElementById('maxTokensPerChunk'); |
|
const maxTokensPerChunkValue = document.getElementById('maxTokensPerChunkValue'); |
|
|
|
|
|
const dropZone = document.getElementById('dropZone'); |
|
const pdfFileInput = document.getElementById('pdfFileInput'); |
|
const browseFilesBtn = document.getElementById('browseFilesBtn'); |
|
const fileInfo = document.getElementById('fileInfo'); |
|
const fileName = document.getElementById('fileName'); |
|
const fileSize = document.getElementById('fileSize'); |
|
const removeFileBtn = document.getElementById('removeFileBtn'); |
|
|
|
|
|
const targetLanguage = document.getElementById('targetLanguage'); |
|
|
|
|
|
const processBtn = document.getElementById('processBtn'); |
|
const downloadMarkdownBtn = document.getElementById('downloadMarkdownBtn'); |
|
const downloadTranslationBtn = document.getElementById('downloadTranslationBtn'); |
|
|
|
|
|
const resultsSection = document.getElementById('resultsSection'); |
|
const markdownPreview = document.getElementById('markdownPreview'); |
|
const translationPreview = document.getElementById('translationPreview'); |
|
const translationResultCard = document.getElementById('translationResultCard'); |
|
|
|
|
|
const progressSection = document.getElementById('progressSection'); |
|
const progressStep = document.getElementById('progressStep'); |
|
const progressPercentage = document.getElementById('progressPercentage'); |
|
const progressBar = document.getElementById('progressBar'); |
|
const progressLog = document.getElementById('progressLog'); |
|
|
|
document.addEventListener('DOMContentLoaded', () => { |
|
|
|
if (localStorage.getItem('mistralApiKey')) { |
|
mistralApiKeyInput.value = localStorage.getItem('mistralApiKey'); |
|
rememberMistralKeyCheckbox.checked = true; |
|
} |
|
|
|
if (localStorage.getItem('translationApiKey')) { |
|
translationApiKeyInput.value = localStorage.getItem('translationApiKey'); |
|
rememberTranslationKeyCheckbox.checked = true; |
|
} |
|
|
|
|
|
loadSettings(); |
|
|
|
|
|
toggleMistralKeyBtn.addEventListener('click', () => { |
|
if (mistralApiKeyInput.type === 'password') { |
|
mistralApiKeyInput.type = 'text'; |
|
toggleMistralKeyBtn.innerHTML = '<iconify-icon icon="carbon:view-off" width="20"></iconify-icon>'; |
|
} else { |
|
mistralApiKeyInput.type = 'password'; |
|
toggleMistralKeyBtn.innerHTML = '<iconify-icon icon="carbon:view" width="20"></iconify-icon>'; |
|
} |
|
}); |
|
|
|
toggleTranslationKeyBtn.addEventListener('click', () => { |
|
if (translationApiKeyInput.type === 'password') { |
|
translationApiKeyInput.type = 'text'; |
|
toggleTranslationKeyBtn.innerHTML = '<iconify-icon icon="carbon:view-off" width="20"></iconify-icon>'; |
|
} else { |
|
translationApiKeyInput.type = 'password'; |
|
toggleTranslationKeyBtn.innerHTML = '<iconify-icon icon="carbon:view" width="20"></iconify-icon>'; |
|
} |
|
}); |
|
|
|
|
|
rememberMistralKeyCheckbox.addEventListener('change', () => { |
|
if (rememberMistralKeyCheckbox.checked) { |
|
localStorage.setItem('mistralApiKey', mistralApiKeyInput.value); |
|
} else { |
|
localStorage.removeItem('mistralApiKey'); |
|
} |
|
}); |
|
|
|
rememberTranslationKeyCheckbox.addEventListener('change', () => { |
|
if (rememberTranslationKeyCheckbox.checked) { |
|
localStorage.setItem('translationApiKey', translationApiKeyInput.value); |
|
} else { |
|
localStorage.removeItem('translationApiKey'); |
|
} |
|
}); |
|
|
|
mistralApiKeyInput.addEventListener('input', () => { |
|
if (rememberMistralKeyCheckbox.checked) { |
|
localStorage.setItem('mistralApiKey', mistralApiKeyInput.value); |
|
} |
|
}); |
|
|
|
translationApiKeyInput.addEventListener('input', () => { |
|
if (rememberTranslationKeyCheckbox.checked) { |
|
localStorage.setItem('translationApiKey', translationApiKeyInput.value); |
|
} |
|
}); |
|
|
|
|
|
dropZone.addEventListener('dragover', (e) => { |
|
e.preventDefault(); |
|
dropZone.classList.add('border-blue-500', 'bg-blue-50'); |
|
}); |
|
|
|
dropZone.addEventListener('dragleave', () => { |
|
dropZone.classList.remove('border-blue-500', 'bg-blue-50'); |
|
}); |
|
|
|
dropZone.addEventListener('drop', (e) => { |
|
e.preventDefault(); |
|
dropZone.classList.remove('border-blue-500', 'bg-blue-50'); |
|
|
|
if (e.dataTransfer.files.length > 0 && e.dataTransfer.files[0].type === 'application/pdf') { |
|
handleFileSelection(e.dataTransfer.files[0]); |
|
} else { |
|
showNotification('请上传PDF文件', 'error'); |
|
} |
|
}); |
|
|
|
|
|
browseFilesBtn.addEventListener('click', () => { |
|
pdfFileInput.click(); |
|
}); |
|
|
|
|
|
pdfFileInput.addEventListener('change', (e) => { |
|
if (e.target.files.length > 0) { |
|
handleFileSelection(e.target.files[0]); |
|
} |
|
}); |
|
|
|
|
|
removeFileBtn.addEventListener('click', () => { |
|
pdfFile = null; |
|
fileInfo.classList.add('hidden'); |
|
pdfFileInput.value = ''; |
|
updateProcessButtonState(); |
|
}); |
|
|
|
|
|
processBtn.addEventListener('click', async () => { |
|
try { |
|
const mistralKey = mistralApiKeyInput.value.trim(); |
|
|
|
if (!mistralKey) { |
|
showNotification('请输入Mistral API Key', 'error'); |
|
return; |
|
} |
|
|
|
if (!pdfFile) { |
|
showNotification('请上传PDF文件', 'error'); |
|
return; |
|
} |
|
|
|
|
|
processBtn.disabled = true; |
|
showProgressSection(); |
|
updateProgress('开始处理...', 5); |
|
addProgressLog('开始OCR处理...'); |
|
|
|
try { |
|
|
|
await processPdfWithMistral(mistralKey); |
|
|
|
|
|
if (translationModelSelect.value !== 'none') { |
|
const translationKey = translationApiKeyInput.value.trim(); |
|
if (translationModelSelect.value !== 'none' && !translationKey) { |
|
showNotification('请输入翻译API Key', 'error'); |
|
updateProgress('翻译需要API Key', 100); |
|
addProgressLog('错误: 缺少翻译API Key'); |
|
processBtn.disabled = false; |
|
return; |
|
} |
|
updateProgress('开始翻译...', 60); |
|
addProgressLog(`使用${translationModelSelect.value}模型进行翻译...`); |
|
|
|
|
|
const estimatedTokens = estimateTokenCount(markdownContent); |
|
const tokenLimit = 12000; |
|
|
|
if (estimatedTokens > tokenLimit) { |
|
|
|
addProgressLog(`文档较大(~${Math.round(estimatedTokens/1000)}K tokens),将进行分段翻译`); |
|
translationContent = await translateLongDocument(markdownContent, targetLanguage.value, translationModelSelect.value, translationKey); |
|
} else { |
|
|
|
addProgressLog(`文档较小(~${Math.round(estimatedTokens/1000)}K tokens),不分段直接翻译`); |
|
translationContent = await translateMarkdown(markdownContent, targetLanguage.value, translationModelSelect.value, translationKey); |
|
} |
|
} |
|
|
|
|
|
updateProgress('处理完成!', 100); |
|
addProgressLog('全部处理完成!'); |
|
showResultsSection(); |
|
} catch (error) { |
|
console.error('处理错误:', error); |
|
showNotification('处理过程中出错: ' + error.message, 'error'); |
|
addProgressLog('错误: ' + error.message); |
|
updateProgress('处理失败', 100); |
|
} finally { |
|
processBtn.disabled = false; |
|
} |
|
} catch (error) { |
|
console.error('处理错误:', error); |
|
showNotification('处理过程中出错: ' + error.message, 'error'); |
|
addProgressLog('错误: ' + error.message); |
|
updateProgress('处理失败', 100); |
|
processBtn.disabled = false; |
|
} |
|
}); |
|
|
|
|
|
downloadMarkdownBtn.addEventListener('click', () => { |
|
if (markdownContent) { |
|
downloadMarkdownWithImages(); |
|
} |
|
}); |
|
|
|
downloadTranslationBtn.addEventListener('click', () => { |
|
if (translationContent) { |
|
|
|
downloadTranslationWithImages(); |
|
} |
|
}); |
|
|
|
|
|
translationModelSelect.addEventListener('change', function() { |
|
if (this.value === 'custom') { |
|
customModelSettings.classList.remove('hidden'); |
|
} else { |
|
customModelSettings.classList.add('hidden'); |
|
} |
|
|
|
|
|
updateTranslationUIVisibility(); |
|
|
|
|
|
saveSettings(); |
|
}); |
|
|
|
|
|
advancedSettingsToggle.addEventListener('click', function() { |
|
advancedSettings.classList.toggle('hidden'); |
|
|
|
|
|
if (advancedSettings.classList.contains('hidden')) { |
|
advancedSettingsIcon.setAttribute('icon', 'carbon:chevron-down'); |
|
} else { |
|
advancedSettingsIcon.setAttribute('icon', 'carbon:chevron-up'); |
|
} |
|
|
|
|
|
saveSettings(); |
|
}); |
|
|
|
|
|
maxTokensPerChunk.addEventListener('input', function() { |
|
maxTokensPerChunkValue.textContent = this.value; |
|
saveSettings(); |
|
}); |
|
|
|
|
|
const customModelInputs = [ |
|
document.getElementById('customModelName'), |
|
document.getElementById('customApiEndpoint'), |
|
document.getElementById('customModelId'), |
|
document.getElementById('customRequestFormat') |
|
]; |
|
|
|
customModelInputs.forEach(input => { |
|
input.addEventListener('change', function() { |
|
saveSettings(); |
|
}); |
|
|
|
input.addEventListener('input', function() { |
|
saveSettings(); |
|
}); |
|
}); |
|
|
|
|
|
updateProcessButtonState(); |
|
updateTranslationUIVisibility(); |
|
}); |
|
|
|
|
|
function handleFileSelection(file) { |
|
pdfFile = file; |
|
document.getElementById('fileName').textContent = file.name; |
|
document.getElementById('fileSize').textContent = formatFileSize(file.size); |
|
document.getElementById('fileInfo').classList.remove('hidden'); |
|
updateProcessButtonState(); |
|
} |
|
|
|
function updateProcessButtonState() { |
|
const mistralKey = document.getElementById('mistralApiKey').value.trim(); |
|
const processBtn = document.getElementById('processBtn'); |
|
processBtn.disabled = !pdfFile || !mistralKey; |
|
} |
|
|
|
function updateTranslationUIVisibility() { |
|
const translationModelValue = translationModelSelect.value; |
|
|
|
|
|
const translationApiKeySection = document.querySelector('#translationApiKey').closest('div').parentNode; |
|
if (translationModelValue !== 'none') { |
|
translationApiKeySection.style.display = 'block'; |
|
} else { |
|
translationApiKeySection.style.display = 'none'; |
|
} |
|
} |
|
|
|
function showResultsSection() { |
|
document.getElementById('progressSection').classList.add('hidden'); |
|
document.getElementById('resultsSection').classList.remove('hidden'); |
|
|
|
|
|
document.getElementById('markdownPreview').textContent = markdownContent.substring(0, 500) + '...'; |
|
|
|
|
|
if (translationContent) { |
|
document.getElementById('translationPreview').textContent = translationContent.substring(0, 500) + '...'; |
|
document.getElementById('translationResultCard').classList.remove('hidden'); |
|
} else { |
|
document.getElementById('translationResultCard').classList.add('hidden'); |
|
} |
|
|
|
window.scrollTo({ |
|
top: document.getElementById('resultsSection').offsetTop - 20, |
|
behavior: 'smooth' |
|
}); |
|
} |
|
|
|
function showProgressSection() { |
|
document.getElementById('resultsSection').classList.add('hidden'); |
|
document.getElementById('progressSection').classList.remove('hidden'); |
|
document.getElementById('progressLog').innerHTML = ''; |
|
updateProgress('初始化...', 0); |
|
|
|
window.scrollTo({ |
|
top: document.getElementById('progressSection').offsetTop - 20, |
|
behavior: 'smooth' |
|
}); |
|
} |
|
|
|
function updateProgress(stepText, percentage) { |
|
document.getElementById('progressStep').textContent = stepText; |
|
document.getElementById('progressPercentage').textContent = `${percentage}%`; |
|
document.getElementById('progressBar').style.width = `${percentage}%`; |
|
} |
|
|
|
function addProgressLog(text) { |
|
const logElement = document.getElementById('progressLog'); |
|
const timestamp = new Date().toLocaleTimeString(); |
|
logElement.innerHTML += `<div>[${timestamp}] ${text}</div>`; |
|
logElement.scrollTop = logElement.scrollHeight; |
|
} |
|
|
|
function showNotification(message, type = 'info') { |
|
|
|
const notification = document.createElement('div'); |
|
|
|
|
|
let bgColor, iconName, textColor; |
|
switch (type) { |
|
case 'success': |
|
bgColor = 'bg-green-50 border-green-500'; |
|
textColor = 'text-green-800'; |
|
iconName = 'carbon:checkmark-filled'; |
|
break; |
|
case 'error': |
|
bgColor = 'bg-red-50 border-red-500'; |
|
textColor = 'text-red-800'; |
|
iconName = 'carbon:error-filled'; |
|
break; |
|
case 'warning': |
|
bgColor = 'bg-yellow-50 border-yellow-500'; |
|
textColor = 'text-yellow-800'; |
|
iconName = 'carbon:warning-filled'; |
|
break; |
|
default: |
|
bgColor = 'bg-blue-50 border-blue-500'; |
|
textColor = 'text-blue-800'; |
|
iconName = 'carbon:information-filled'; |
|
} |
|
|
|
|
|
notification.className = `flex items-center p-4 mb-4 max-w-md border-l-4 ${bgColor} ${textColor} shadow-md rounded-r-lg transform transition-all duration-300 ease-in-out`; |
|
notification.style.opacity = '0'; |
|
notification.style.transform = 'translateX(100%)'; |
|
|
|
|
|
notification.innerHTML = ` |
|
<iconify-icon icon="${iconName}" class="flex-shrink-0 w-5 h-5 mr-2"></iconify-icon> |
|
<div class="ml-3 text-sm font-medium flex-grow">${message}</div> |
|
<button type="button" class="ml-auto -mx-1.5 -my-1.5 rounded-lg p-1.5 inline-flex h-8 w-8 hover:bg-gray-200 focus:ring-2 focus:ring-gray-400"> |
|
<iconify-icon icon="carbon:close" class="w-5 h-5"></iconify-icon> |
|
</button> |
|
`; |
|
|
|
|
|
const container = document.getElementById('notification-container'); |
|
container.appendChild(notification); |
|
|
|
|
|
setTimeout(() => { |
|
notification.style.opacity = '1'; |
|
notification.style.transform = 'translateX(0)'; |
|
}, 10); |
|
|
|
|
|
const closeButton = notification.querySelector('button'); |
|
closeButton.addEventListener('click', () => { |
|
closeNotification(notification); |
|
}); |
|
|
|
|
|
const timeout = setTimeout(() => { |
|
closeNotification(notification); |
|
}, 5000); |
|
|
|
|
|
notification.dataset.timeout = timeout; |
|
|
|
|
|
return notification; |
|
} |
|
|
|
|
|
function closeNotification(notification) { |
|
|
|
clearTimeout(notification.dataset.timeout); |
|
|
|
|
|
notification.style.opacity = '0'; |
|
notification.style.transform = 'translateX(100%)'; |
|
|
|
|
|
setTimeout(() => { |
|
if (notification.parentNode) { |
|
notification.parentNode.removeChild(notification); |
|
} |
|
}, 300); |
|
} |
|
|
|
function formatFileSize(bytes) { |
|
if (bytes < 1024) return bytes + ' B'; |
|
else if (bytes < 1048576) return (bytes / 1024).toFixed(2) + ' KB'; |
|
else return (bytes / 1048576).toFixed(2) + ' MB'; |
|
} |
|
|
|
|
|
async function processPdfWithMistral(apiKey) { |
|
try { |
|
addProgressLog('准备PDF文件...'); |
|
updateProgress('PDF处理准备中', 10); |
|
|
|
|
|
if (apiKey.length < 30) { |
|
throw new Error('Mistral API密钥格式可能不正确,请检查'); |
|
} |
|
|
|
|
|
const formData = new FormData(); |
|
|
|
formData.append('file', pdfFile); |
|
formData.append('purpose', 'ocr'); |
|
|
|
addProgressLog('准备上传PDF文件...'); |
|
updateProgress('上传文件中...', 20); |
|
addProgressLog('开始上传到Mistral...'); |
|
|
|
console.log('开始上传文件,文件名:', pdfFile.name, '文件大小:', pdfFile.size); |
|
|
|
|
|
let response; |
|
try { |
|
response = await fetch('https://api.mistral.ai/v1/files', { |
|
method: 'POST', |
|
headers: { |
|
'Authorization': `Bearer ${apiKey}` |
|
|
|
}, |
|
body: formData |
|
}); |
|
} catch (uploadError) { |
|
console.error('上传错误详情:', uploadError); |
|
addProgressLog(`网络错误: ${uploadError.message || '未知网络错误'}`); |
|
throw new Error(`文件上传失败,网络错误: ${uploadError.message || '未知网络错误'}`); |
|
} |
|
|
|
if (!response.ok) { |
|
let errorInfo = '未知错误'; |
|
try { |
|
const responseText = await response.text(); |
|
console.error('上传失败原始响应:', responseText); |
|
try { |
|
const jsonError = JSON.parse(responseText); |
|
errorInfo = jsonError.error?.message || jsonError.message || jsonError.detail || responseText; |
|
} catch (e) { |
|
errorInfo = responseText || `HTTP错误: ${response.status} ${response.statusText}`; |
|
} |
|
} catch (e) { |
|
errorInfo = `HTTP错误: ${response.status} ${response.statusText}`; |
|
} |
|
|
|
addProgressLog(`上传失败: ${response.status} - ${errorInfo}`); |
|
|
|
if (response.status === 401) { |
|
throw new Error('API密钥无效或未授权,请检查您的Mistral API密钥'); |
|
} else { |
|
throw new Error(`文件上传失败 (${response.status}): ${errorInfo}`); |
|
} |
|
} |
|
|
|
let fileData; |
|
try { |
|
fileData = await response.json(); |
|
console.log('文件上传响应:', JSON.stringify(fileData)); |
|
} catch (e) { |
|
console.error('解析文件数据错误:', e); |
|
throw new Error('无法解析文件上传响应数据'); |
|
} |
|
|
|
if (!fileData || !fileData.id) { |
|
console.error('文件数据无效:', fileData); |
|
throw new Error('上传成功但未返回有效的文件ID'); |
|
} |
|
|
|
const fileId = fileData.id; |
|
addProgressLog(`文件上传成功,ID: ${fileId}`); |
|
updateProgress('获取文件访问权限...', 30); |
|
|
|
|
|
if (typeof fileId !== 'string' || fileId.trim() === '') { |
|
throw new Error('文件ID无效,无法继续处理'); |
|
} |
|
|
|
|
|
await new Promise(resolve => setTimeout(resolve, 1000)); |
|
|
|
|
|
try { |
|
|
|
const urlEndpoint = `https://api.mistral.ai/v1/files/${fileId}/url?expiry=24`; |
|
console.log('请求签名URL:', urlEndpoint); |
|
|
|
response = await fetch(urlEndpoint, { |
|
method: 'GET', |
|
headers: { |
|
'Authorization': `Bearer ${apiKey}`, |
|
'Accept': 'application/json' |
|
} |
|
}); |
|
} catch (urlError) { |
|
console.error('获取URL错误详情:', urlError); |
|
addProgressLog(`获取URL错误: ${urlError.message || '未知网络错误'}`); |
|
throw new Error(`获取签名URL失败,网络错误: ${urlError.message || '未知网络错误'}`); |
|
} |
|
|
|
if (!response.ok) { |
|
let errorInfo = '未知错误'; |
|
try { |
|
const responseText = await response.text(); |
|
console.error('获取URL失败原始响应:', responseText); |
|
try { |
|
const jsonError = JSON.parse(responseText); |
|
errorInfo = jsonError.error?.message || jsonError.message || jsonError.detail || responseText; |
|
} catch (e) { |
|
errorInfo = responseText || `HTTP错误: ${response.status} ${response.statusText}`; |
|
} |
|
} catch (e) { |
|
errorInfo = `HTTP错误: ${response.status} ${response.statusText}`; |
|
} |
|
|
|
addProgressLog(`获取签名URL失败: ${response.status} - ${errorInfo}`); |
|
throw new Error(`获取签名URL失败 (${response.status}): ${errorInfo}`); |
|
} |
|
|
|
let urlData; |
|
try { |
|
urlData = await response.json(); |
|
console.log('签名URL响应:', JSON.stringify(urlData)); |
|
} catch (e) { |
|
console.error('解析URL数据错误:', e); |
|
throw new Error('无法解析签名URL响应数据'); |
|
} |
|
|
|
if (!urlData || !urlData.url) { |
|
console.error('URL数据无效:', urlData); |
|
addProgressLog('返回的URL格式不正确'); |
|
throw new Error('获取的签名URL格式不正确'); |
|
} |
|
|
|
const signedUrl = urlData.url; |
|
addProgressLog('成功获取文件访问URL'); |
|
updateProgress('开始OCR处理...', 40); |
|
|
|
|
|
try { |
|
response = await fetch('https://api.mistral.ai/v1/ocr', { |
|
method: 'POST', |
|
headers: { |
|
'Authorization': `Bearer ${apiKey}`, |
|
'Content-Type': 'application/json', |
|
'Accept': 'application/json' |
|
}, |
|
body: JSON.stringify({ |
|
|
|
model: 'mistral-ocr-latest', |
|
document: { |
|
type: "document_url", |
|
document_url: signedUrl |
|
}, |
|
include_image_base64: true |
|
}) |
|
}); |
|
} catch (ocrError) { |
|
console.error('OCR错误详情:', ocrError); |
|
addProgressLog(`OCR处理网络错误: ${ocrError.message || '未知网络错误'}`); |
|
throw new Error(`OCR处理失败,网络错误: ${ocrError.message || '未知网络错误'}`); |
|
} |
|
|
|
if (!response.ok) { |
|
let errorInfo = '未知错误'; |
|
try { |
|
const responseText = await response.text(); |
|
console.error('OCR处理失败原始响应:', responseText); |
|
try { |
|
const jsonError = JSON.parse(responseText); |
|
errorInfo = jsonError.error?.message || jsonError.message || jsonError.detail || responseText; |
|
} catch (e) { |
|
errorInfo = responseText || `HTTP错误: ${response.status} ${response.statusText}`; |
|
} |
|
} catch (e) { |
|
errorInfo = `HTTP错误: ${response.status} ${response.statusText}`; |
|
} |
|
|
|
addProgressLog(`OCR处理失败: ${response.status} - ${errorInfo}`); |
|
throw new Error(`OCR处理失败 (${response.status}): ${errorInfo}`); |
|
} |
|
|
|
let ocrData; |
|
try { |
|
ocrData = await response.json(); |
|
console.log('OCR处理成功,返回数据类型:', typeof ocrData); |
|
} catch (e) { |
|
console.error('解析OCR数据错误:', e); |
|
throw new Error('无法解析OCR处理响应数据'); |
|
} |
|
|
|
if (!ocrData || !ocrData.pages) { |
|
console.error('OCR数据无效:', ocrData); |
|
throw new Error('OCR处理成功但返回的数据格式不正确'); |
|
} |
|
|
|
addProgressLog('OCR处理完成,开始生成Markdown'); |
|
updateProgress('生成Markdown...', 50); |
|
|
|
|
|
await processOcrResults(ocrData); |
|
addProgressLog('Markdown生成完成'); |
|
|
|
return true; |
|
} catch (error) { |
|
console.error('Mistral OCR处理错误:', error); |
|
addProgressLog(`处理失败: ${error.message || '未知错误'}`); |
|
throw error; |
|
} |
|
} |
|
|
|
|
|
async function processOcrResults(ocrResponse) { |
|
try { |
|
markdownContent = ''; |
|
imagesData = []; |
|
|
|
|
|
for (const page of ocrResponse.pages) { |
|
|
|
const pageImages = {}; |
|
|
|
for (const img of page.images) { |
|
const imgId = img.id; |
|
const imgData = img.image_base64; |
|
imagesData.push({ |
|
id: imgId, |
|
data: imgData |
|
}); |
|
pageImages[imgId] = `images/${imgId}.png`; |
|
} |
|
|
|
|
|
let pageMarkdown = page.markdown; |
|
for (const [imgName, imgPath] of Object.entries(pageImages)) { |
|
pageMarkdown = pageMarkdown.replace( |
|
new RegExp(`!\\[${imgName}\\]\\(${imgName}\\)`, 'g'), |
|
`` |
|
); |
|
} |
|
|
|
markdownContent += pageMarkdown + '\n\n'; |
|
} |
|
|
|
return true; |
|
} catch (error) { |
|
console.error('处理OCR结果错误:', error); |
|
throw new Error('处理OCR结果失败: ' + error.message); |
|
} |
|
} |
|
|
|
|
|
async function translateMarkdown(markdownText, targetLang, model, apiKey) { |
|
try { |
|
|
|
const content = markdownText || markdownContent; |
|
const lang = targetLang || document.getElementById('targetLanguage').value; |
|
const selectedModel = model || document.getElementById('translationModel').value; |
|
const key = apiKey || document.getElementById('translationApiKey').value.trim(); |
|
|
|
if (!content) { |
|
throw new Error('没有要翻译的内容'); |
|
} |
|
|
|
if (!key) { |
|
throw new Error('未提供API密钥'); |
|
} |
|
|
|
if (selectedModel === 'none') { |
|
return content; |
|
} |
|
|
|
|
|
const actualLang = lang === 'chinese' ? 'zh' : lang; |
|
|
|
|
|
const translationPromptTemplate = `请将以下${actualLang === 'zh' ? '英文' : '中文'}内容翻译为${actualLang === 'zh' ? '中文' : '英文'}, |
|
要求: |
|
|
|
1. 保持所有Markdown语法元素不变(如#标题, *斜体*, **粗体**, [链接](), ![图片]()等) |
|
|
|
2. 学术/专业术语应准确翻译,必要时可保留英文原文在括号中 |
|
|
|
3. 保持原文的段落结构和格式 |
|
|
|
4. 仅翻译内容,不要添加额外解释 |
|
|
|
5. 对于行间公式,使用: |
|
$$ |
|
... |
|
$$ |
|
标记 |
|
|
|
文档内容: |
|
|
|
${content}`; |
|
|
|
|
|
const temperature = 0.5; |
|
const maxTokens = 100000; |
|
const sys_prompt = "你是一个专业的文档翻译助手,擅长保持原文档格式进行精确翻译。"; |
|
|
|
|
|
const apiConfigs = { |
|
'deepseek': { |
|
endpoint: 'https://api.deepseek.com/v1/chat/completions', |
|
modelName: 'DeepSeek v3 (deepseek-v3)', |
|
headers: { |
|
'Content-Type': 'application/json', |
|
'Authorization': `Bearer ${key}` |
|
}, |
|
bodyBuilder: () => ({ |
|
model: "deepseek-v3", |
|
messages: [ |
|
{ role: "system", content: sys_prompt }, |
|
{ role: "user", content: translationPromptTemplate } |
|
], |
|
temperature: temperature, |
|
max_tokens: maxTokens |
|
}), |
|
responseExtractor: (data) => data.choices[0].message.content |
|
}, |
|
'gemini': { |
|
endpoint: `https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key=${key}`, |
|
modelName: 'Google Gemini 2.0 Flash', |
|
headers: { 'Content-Type': 'application/json' }, |
|
bodyBuilder: () => ({ |
|
contents: [ |
|
{ |
|
role: "user", |
|
parts: [{ text: translationPromptTemplate }] |
|
} |
|
], |
|
generationConfig: { |
|
temperature: temperature, |
|
maxOutputTokens: maxTokens |
|
} |
|
}), |
|
responseExtractor: (data) => data.candidates[0].content.parts[0].text |
|
}, |
|
'claude': { |
|
endpoint: 'https://api.anthropic.com/v1/messages', |
|
modelName: 'Claude 3.5 Sonnet', |
|
headers: { |
|
'Content-Type': 'application/json', |
|
'x-api-key': key, |
|
'anthropic-version': '2023-06-01' |
|
}, |
|
bodyBuilder: () => ({ |
|
model: "claude-3-5-sonnet", |
|
max_tokens: maxTokens, |
|
messages: [ |
|
{ role: "user", content: translationPromptTemplate } |
|
] |
|
}), |
|
responseExtractor: (data) => data.content[0].text |
|
}, |
|
'mistral': { |
|
endpoint: 'https://api.mistral.ai/v1/chat/completions', |
|
modelName: 'Mistral Large (mistral-large-latest)', |
|
headers: { |
|
'Content-Type': 'application/json', |
|
'Authorization': `Bearer ${key}` |
|
}, |
|
bodyBuilder: () => ({ |
|
model: "mistral-large-latest", |
|
messages: [ |
|
{ role: "system", content: sys_prompt }, |
|
{ role: "user", content: translationPromptTemplate } |
|
], |
|
temperature: temperature, |
|
max_tokens: maxTokens |
|
}), |
|
responseExtractor: (data) => data.choices[0].message.content |
|
}, |
|
'tongyi-deepseek-v3': { |
|
endpoint: 'https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions', |
|
modelName: '阿里云通义百炼 DeepSeek v3', |
|
headers: { |
|
'Content-Type': 'application/json', |
|
'Authorization': `Bearer ${key}` |
|
}, |
|
bodyBuilder: () => ({ |
|
model: "deepseek-v3", |
|
messages: [ |
|
{ role: "system", content: sys_prompt }, |
|
{ role: "user", content: translationPromptTemplate } |
|
], |
|
temperature: temperature, |
|
max_tokens: maxTokens |
|
}), |
|
responseExtractor: (data) => data.choices[0].message.content |
|
}, |
|
'volcano-deepseek-v3': { |
|
endpoint: 'https://api.volcengine.com/ml/api/v1/open/llm/inference', |
|
modelName: '火山引擎 DeepSeek v3', |
|
headers: { |
|
'Content-Type': 'application/json', |
|
'Authorization': `Bearer ${key}` |
|
}, |
|
bodyBuilder: () => ({ |
|
model: "deepseek-v3", |
|
messages: [ |
|
{ role: "system", content: sys_prompt }, |
|
{ role: "user", content: translationPromptTemplate } |
|
], |
|
parameters: { |
|
temperature: temperature, |
|
max_tokens: maxTokens |
|
} |
|
}), |
|
responseExtractor: (data) => data.choices[0].message.content |
|
}, |
|
'custom': { |
|
|
|
createConfig: () => { |
|
|
|
const customModelName = document.getElementById('customModelName').value.trim(); |
|
const customApiEndpoint = document.getElementById('customApiEndpoint').value.trim(); |
|
const customModelId = document.getElementById('customModelId').value.trim(); |
|
const customRequestFormat = document.getElementById('customRequestFormat').value; |
|
|
|
if (!customModelName || !customApiEndpoint || !customModelId) { |
|
throw new Error('请填写完整的自定义模型信息'); |
|
} |
|
|
|
|
|
const config = { |
|
endpoint: customApiEndpoint, |
|
modelName: customModelName, |
|
headers: { |
|
'Content-Type': 'application/json' |
|
}, |
|
bodyBuilder: null, |
|
responseExtractor: null |
|
}; |
|
|
|
|
|
if (customApiEndpoint.includes('anthropic')) { |
|
config.headers['x-api-key'] = key; |
|
config.headers['anthropic-version'] = '2023-06-01'; |
|
} else { |
|
config.headers['Authorization'] = `Bearer ${key}`; |
|
} |
|
|
|
|
|
switch (customRequestFormat) { |
|
case 'openai': |
|
config.bodyBuilder = () => ({ |
|
model: customModelId, |
|
messages: [ |
|
{ role: "system", content: sys_prompt }, |
|
{ role: "user", content: translationPromptTemplate } |
|
], |
|
temperature: temperature, |
|
max_tokens: maxTokens |
|
}); |
|
config.responseExtractor = (data) => data.choices[0].message.content; |
|
break; |
|
|
|
case 'anthropic': |
|
config.bodyBuilder = () => ({ |
|
model: customModelId, |
|
max_tokens: maxTokens, |
|
messages: [ |
|
{ role: "user", content: translationPromptTemplate } |
|
] |
|
}); |
|
config.responseExtractor = (data) => data.content[0].text; |
|
break; |
|
|
|
case 'gemini': |
|
config.bodyBuilder = () => ({ |
|
contents: [ |
|
{ |
|
role: "user", |
|
parts: [{ text: translationPromptTemplate }] |
|
} |
|
], |
|
generationConfig: { |
|
temperature: temperature, |
|
maxOutputTokens: maxTokens |
|
} |
|
}); |
|
config.responseExtractor = (data) => data.candidates[0].content.parts[0].text; |
|
break; |
|
} |
|
|
|
return config; |
|
} |
|
} |
|
}; |
|
|
|
|
|
const apiConfig = apiConfigs[selectedModel]; |
|
|
|
if (!apiConfig) { |
|
throw new Error(`不支持的翻译模型: ${selectedModel}`); |
|
} |
|
|
|
addProgressLog(`正在调用${apiConfig.modelName || selectedModel}翻译API...`); |
|
let response; |
|
|
|
if (selectedModel !== 'custom') { |
|
response = await fetch(apiConfig.endpoint, { |
|
method: 'POST', |
|
headers: apiConfig.headers, |
|
body: JSON.stringify(apiConfig.bodyBuilder()) |
|
}); |
|
} else { |
|
|
|
const customConfig = apiConfig.createConfig(); |
|
response = await fetch(customConfig.endpoint, { |
|
method: 'POST', |
|
headers: customConfig.headers, |
|
body: JSON.stringify(customConfig.bodyBuilder()) |
|
}); |
|
} |
|
|
|
if (!response.ok) { |
|
let errorText; |
|
try { |
|
const errorJson = await response.json(); |
|
errorText = JSON.stringify(errorJson); |
|
} catch (e) { |
|
errorText = await response.text(); |
|
} |
|
|
|
console.error(`API错误 (${response.status}): ${errorText}`); |
|
throw new Error(`翻译API返回错误 (${response.status}): ${errorText.substring(0, 200)}`); |
|
} |
|
|
|
const data = await response.json(); |
|
|
|
|
|
let translatedContent; |
|
|
|
if (selectedModel !== 'custom') { |
|
|
|
translatedContent = apiConfig.responseExtractor(data); |
|
} else { |
|
|
|
const customConfig = apiConfig.createConfig(); |
|
translatedContent = customConfig.responseExtractor(data); |
|
} |
|
|
|
try { |
|
|
|
if (!translatedContent) { |
|
throw new Error('译文为空'); |
|
} |
|
|
|
return translatedContent; |
|
} catch (error) { |
|
console.error('提取翻译结果错误:', error, '原始响应:', data); |
|
throw new Error(`提取翻译结果失败: ${error.message}`); |
|
} |
|
} catch (error) { |
|
console.error('翻译错误:', error); |
|
throw new Error(`调用${model}翻译API失败: ${error.message}`); |
|
} |
|
} |
|
|
|
|
|
async function translateLongDocument(markdownText, targetLang, model, apiKey) { |
|
const parts = splitMarkdownIntoChunks(markdownText); |
|
console.log(`将文档分割为${parts.length}个部分进行翻译`); |
|
addProgressLog(`文档被分割为${parts.length}个部分进行翻译`); |
|
|
|
let translatedContent = ''; |
|
|
|
for (let i = 0; i < parts.length; i++) { |
|
updateProgress(`翻译第 ${i+1}/${parts.length} 部分...`, 60 + Math.floor((i / parts.length) * 30)); |
|
addProgressLog(`正在翻译第 ${i+1}/${parts.length} 部分...`); |
|
|
|
try { |
|
|
|
const partResult = await translateMarkdown(parts[i], targetLang, model, apiKey); |
|
translatedContent += partResult; |
|
|
|
|
|
if (i < parts.length - 1) { |
|
translatedContent += '\n\n'; |
|
} |
|
|
|
|
|
if (i < parts.length - 1) { |
|
await new Promise(resolve => setTimeout(resolve, 1000)); |
|
} |
|
} catch (error) { |
|
console.error(`第 ${i+1} 部分翻译失败:`, error); |
|
addProgressLog(`第 ${i+1} 部分翻译失败: ${error.message}`); |
|
|
|
|
|
translatedContent += `\n\n> **翻译错误 (第 ${i+1} 部分), 使用原语言**: ${error.message}\n\n${parts[i]}\n\n`; |
|
} |
|
} |
|
|
|
|
|
return translatedContent; |
|
} |
|
|
|
|
|
function splitMarkdownIntoChunks(markdown) { |
|
|
|
const estimatedTokens = estimateTokenCount(markdown); |
|
|
|
const tokenLimit = parseInt(maxTokensPerChunk.value) || 2000; |
|
|
|
|
|
if (estimatedTokens <= tokenLimit) { |
|
return [markdown]; |
|
} |
|
|
|
|
|
const chunks = []; |
|
const lines = markdown.split('\n'); |
|
let currentChunk = []; |
|
let currentTokenCount = 0; |
|
let inCodeBlock = false; |
|
|
|
|
|
const headingRegex = /^#{1,6}\s+.+$/; |
|
|
|
for (let i = 0; i < lines.length; i++) { |
|
const line = lines[i]; |
|
|
|
|
|
if (line.trim().startsWith('```')) { |
|
inCodeBlock = !inCodeBlock; |
|
} |
|
|
|
|
|
const lineTokens = estimateTokenCount(line); |
|
|
|
|
|
const isHeading = headingRegex.test(line) && !inCodeBlock; |
|
const wouldExceedLimit = currentTokenCount + lineTokens > tokenLimit; |
|
|
|
if (isHeading && currentChunk.length > 0 && (wouldExceedLimit || currentTokenCount > tokenLimit * 0.7)) { |
|
|
|
chunks.push(currentChunk.join('\n')); |
|
currentChunk = []; |
|
currentTokenCount = 0; |
|
} |
|
|
|
|
|
if (!isHeading && wouldExceedLimit && currentChunk.length > 0) { |
|
chunks.push(currentChunk.join('\n')); |
|
currentChunk = []; |
|
currentTokenCount = 0; |
|
} |
|
|
|
|
|
currentChunk.push(line); |
|
currentTokenCount += lineTokens; |
|
} |
|
|
|
|
|
if (currentChunk.length > 0) { |
|
chunks.push(currentChunk.join('\n')); |
|
} |
|
|
|
|
|
const finalChunks = []; |
|
for (const chunk of chunks) { |
|
const chunkTokens = estimateTokenCount(chunk); |
|
if (chunkTokens > tokenLimit) { |
|
|
|
const subChunks = splitByParagraphs(chunk, tokenLimit); |
|
finalChunks.push(...subChunks); |
|
} else { |
|
finalChunks.push(chunk); |
|
} |
|
} |
|
|
|
return finalChunks; |
|
} |
|
|
|
|
|
function splitByParagraphs(text, tokenLimit) { |
|
const paragraphs = text.split('\n\n'); |
|
const chunks = []; |
|
let currentChunk = []; |
|
let currentTokenCount = 0; |
|
|
|
for (const paragraph of paragraphs) { |
|
const paragraphTokens = estimateTokenCount(paragraph); |
|
|
|
|
|
if (paragraphTokens > tokenLimit) { |
|
|
|
if (currentChunk.length > 0) { |
|
chunks.push(currentChunk.join('\n\n')); |
|
currentChunk = []; |
|
currentTokenCount = 0; |
|
} |
|
|
|
|
|
const sentenceChunks = splitBySentences(paragraph, tokenLimit); |
|
chunks.push(...sentenceChunks); |
|
continue; |
|
} |
|
|
|
|
|
if (currentTokenCount + paragraphTokens > tokenLimit && currentChunk.length > 0) { |
|
chunks.push(currentChunk.join('\n\n')); |
|
currentChunk = []; |
|
currentTokenCount = 0; |
|
} |
|
|
|
currentChunk.push(paragraph); |
|
currentTokenCount += paragraphTokens; |
|
} |
|
|
|
|
|
if (currentChunk.length > 0) { |
|
chunks.push(currentChunk.join('\n\n')); |
|
} |
|
|
|
return chunks; |
|
} |
|
|
|
|
|
function splitBySentences(paragraph, tokenLimit) { |
|
|
|
const sentences = paragraph.replace(/([.!?。!?])\s*/g, "$1\n").split('\n'); |
|
const chunks = []; |
|
let currentChunk = []; |
|
let currentTokenCount = 0; |
|
|
|
for (const sentence of sentences) { |
|
if (!sentence.trim()) continue; |
|
|
|
const sentenceTokens = estimateTokenCount(sentence); |
|
|
|
|
|
if (currentTokenCount + sentenceTokens > tokenLimit && currentChunk.length > 0) { |
|
chunks.push(currentChunk.join(' ')); |
|
currentChunk = []; |
|
currentTokenCount = 0; |
|
} |
|
|
|
currentChunk.push(sentence); |
|
currentTokenCount += sentenceTokens; |
|
} |
|
|
|
|
|
if (currentChunk.length > 0) { |
|
chunks.push(currentChunk.join(' ')); |
|
} |
|
|
|
return chunks; |
|
} |
|
|
|
|
|
function mergeTranslatedChunks(chunks) { |
|
|
|
return chunks.join('\n\n'); |
|
} |
|
|
|
|
|
function estimateTokenCount(text) { |
|
if (!text) return 0; |
|
|
|
|
|
|
|
|
|
|
|
|
|
const chineseRatio = (text.match(/[\u4e00-\u9fa5]/g) || []).length / text.length; |
|
|
|
if (chineseRatio > 0.5) { |
|
|
|
return Math.ceil(text.length * 1.5); |
|
} else { |
|
|
|
return Math.ceil(text.length / 3.75); |
|
} |
|
} |
|
|
|
|
|
function fileToBase64(file) { |
|
return new Promise((resolve, reject) => { |
|
const reader = new FileReader(); |
|
reader.readAsDataURL(file); |
|
reader.onload = () => resolve(reader.result); |
|
reader.onerror = error => reject(error); |
|
}); |
|
} |
|
|
|
|
|
function downloadText(content, filename) { |
|
const blob = new Blob([content], { type: 'text/markdown' }); |
|
saveAs(blob, filename); |
|
} |
|
|
|
|
|
async function downloadMarkdownWithImages() { |
|
try { |
|
const zip = new JSZip(); |
|
|
|
|
|
zip.file('document.md', markdownContent); |
|
|
|
|
|
const imagesFolder = zip.folder('images'); |
|
|
|
|
|
for (const img of imagesData) { |
|
const imgData = img.data.split(',')[1]; |
|
imagesFolder.file(`${img.id}.png`, imgData, { base64: true }); |
|
} |
|
|
|
|
|
const zipBlob = await zip.generateAsync({ type: 'blob' }); |
|
const pdfName = pdfFile ? pdfFile.name.replace('.pdf', '') : 'document'; |
|
saveAs(zipBlob, `${pdfName}_markdown.zip`); |
|
} catch (error) { |
|
console.error('创建ZIP文件失败:', error); |
|
showNotification('下载失败: ' + error.message, 'error'); |
|
} |
|
} |
|
|
|
downloadTranslationWithImages = async () => { |
|
try { |
|
const zip = new JSZip(); |
|
|
|
|
|
const currentDate = new Date().toISOString().split('T')[0]; |
|
const headerDeclaration = `> *本文档由 Paper Burner 工具制作 (${currentDate})。内容由 AI 大模型翻译生成,不保证翻译内容的准确性和完整性。*\n\n`; |
|
const footerDeclaration = `\n\n---\n> *免责声明:本文档内容由大模型API自动翻译生成,Paper Burner 工具不对翻译内容的准确性、完整性和合法性负责。*`; |
|
|
|
|
|
const contentToDownload = headerDeclaration + translationContent + footerDeclaration; |
|
zip.file('document.md', contentToDownload); |
|
|
|
|
|
const imagesFolder = zip.folder('images'); |
|
|
|
|
|
for (const img of imagesData) { |
|
const imgData = img.data.split(',')[1]; |
|
imagesFolder.file(`${img.id}.png`, imgData, { base64: true }); |
|
} |
|
|
|
|
|
const zipBlob = await zip.generateAsync({ type: 'blob' }); |
|
const pdfName = pdfFile ? pdfFile.name.replace('.pdf', '') : 'document'; |
|
saveAs(zipBlob, `${pdfName}_translation.zip`); |
|
} catch (error) { |
|
console.error('创建ZIP文件失败:', error); |
|
showNotification('下载失败: ' + error.message, 'error'); |
|
} |
|
} |
|
|
|
|
|
function saveSettings() { |
|
|
|
localStorage.setItem('advancedSettings', JSON.stringify({ |
|
maxTokensPerChunk: maxTokensPerChunk.value |
|
})); |
|
|
|
|
|
if (translationModelSelect.value === 'custom') { |
|
localStorage.setItem('customModelSettings', JSON.stringify({ |
|
modelName: document.getElementById('customModelName').value, |
|
apiEndpoint: document.getElementById('customApiEndpoint').value, |
|
modelId: document.getElementById('customModelId').value, |
|
requestFormat: document.getElementById('customRequestFormat').value |
|
})); |
|
} |
|
|
|
|
|
localStorage.setItem('selectedTranslationModel', translationModelSelect.value); |
|
} |
|
|
|
|
|
function loadSettings() { |
|
|
|
try { |
|
const advancedSettingsData = localStorage.getItem('advancedSettings'); |
|
if (advancedSettingsData) { |
|
const settings = JSON.parse(advancedSettingsData); |
|
if (settings.maxTokensPerChunk) { |
|
maxTokensPerChunk.value = settings.maxTokensPerChunk; |
|
maxTokensPerChunkValue.textContent = settings.maxTokensPerChunk; |
|
} |
|
} |
|
} catch (e) { |
|
console.error('加载高级设置失败:', e); |
|
} |
|
|
|
|
|
try { |
|
const customModelData = localStorage.getItem('customModelSettings'); |
|
if (customModelData) { |
|
const settings = JSON.parse(customModelData); |
|
document.getElementById('customModelName').value = settings.modelName || ''; |
|
document.getElementById('customApiEndpoint').value = settings.apiEndpoint || ''; |
|
document.getElementById('customModelId').value = settings.modelId || ''; |
|
document.getElementById('customRequestFormat').value = settings.requestFormat || 'openai'; |
|
} |
|
} catch (e) { |
|
console.error('加载自定义模型设置失败:', e); |
|
} |
|
|
|
|
|
try { |
|
const selectedModel = localStorage.getItem('selectedTranslationModel'); |
|
if (selectedModel) { |
|
translationModelSelect.value = selectedModel; |
|
|
|
if (selectedModel === 'custom') { |
|
customModelSettings.classList.remove('hidden'); |
|
} |
|
} |
|
} catch (e) { |
|
console.error('加载选中的翻译模型失败:', e); |
|
} |
|
} |
|
|