Spaces:
Running
Running
<html lang="en"> | |
<head> | |
<meta charset="UTF-8"> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
<title>Python Code Crawler</title> | |
<script src="https://cdn.tailwindcss.com"></script> | |
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css"> | |
<style> | |
.gradient-bg { | |
background: linear-gradient(135deg, #6e8efb, #a777e3); | |
} | |
.code-block { | |
font-family: 'Courier New', monospace; | |
background-color: #2d3748; | |
color: #f7fafc; | |
border-radius: 0.5rem; | |
transition: all 0.3s ease; | |
} | |
.code-block:hover { | |
transform: translateY(-2px); | |
box-shadow: 0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05); | |
} | |
.fade-in { | |
animation: fadeIn 0.5s ease-in-out; | |
} | |
@keyframes fadeIn { | |
from { opacity: 0; transform: translateY(10px); } | |
to { opacity: 1; transform: translateY(0); } | |
} | |
.progress-bar { | |
transition: width 0.3s ease; | |
} | |
</style> | |
</head> | |
<body class="min-h-screen bg-gray-100"> | |
<div class="gradient-bg text-white py-12 px-4 sm:px-6 lg:px-8"> | |
<div class="max-w-4xl mx-auto text-center"> | |
<div class="flex justify-center mb-6"> | |
<div class="bg-white bg-opacity-20 p-4 rounded-full"> | |
<i class="fas fa-spider text-4xl"></i> | |
</div> | |
</div> | |
<h1 class="text-4xl font-bold mb-4">Python Code Crawler</h1> | |
<p class="text-xl opacity-90 mb-8">Discover and extract Python code from websites automatically</p> | |
<div class="bg-white bg-opacity-20 backdrop-blur-sm rounded-xl p-6 shadow-lg"> | |
<div class="flex flex-col sm:flex-row gap-4"> | |
<input type="text" id="targetUrl" placeholder="Enter website URL (e.g., https://example.com)" | |
class="flex-grow px-4 py-3 rounded-lg bg-white bg-opacity-90 text-gray-800 focus:outline-none focus:ring-2 focus:ring-purple-300"> | |
<button id="startCrawl" class="px-6 py-3 bg-white text-purple-600 font-semibold rounded-lg hover:bg-opacity-90 transition flex items-center justify-center gap-2"> | |
<i class="fas fa-play"></i> Start Crawling | |
</button> | |
</div> | |
<div class="mt-4 text-left"> | |
<label class="inline-flex items-center"> | |
<input type="checkbox" id="usePattern" class="form-checkbox h-5 w-5 text-purple-600"> | |
<span class="ml-2">Use URL pattern (e.g., https://example.com/docs/*)</span> | |
</label> | |
</div> | |
</div> | |
</div> | |
</div> | |
<div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-12"> | |
<div id="resultsContainer" class="space-y-8"> | |
<!-- Results will be added here dynamically --> | |
</div> | |
<div id="statusPanel" class="hidden bg-white rounded-xl shadow-md p-6 mt-8"> | |
<div class="flex justify-between items-center mb-4"> | |
<h3 class="text-xl font-semibold text-gray-800">Crawling Progress</h3> | |
<button id="stopCrawl" class="px-4 py-2 bg-red-500 text-white rounded-lg hover:bg-red-600 transition flex items-center gap-2"> | |
<i class="fas fa-stop"></i> Stop | |
</button> | |
</div> | |
<div class="mb-4"> | |
<div class="flex justify-between text-sm text-gray-600 mb-1"> | |
<span>Visited: <span id="visitedCount">0</span> pages</span> | |
<span>Found: <span id="codeBlocksCount">0</span> code blocks</span> | |
</div> | |
<div class="w-full bg-gray-200 rounded-full h-2.5"> | |
<div id="progressBar" class="progress-bar bg-purple-600 h-2.5 rounded-full" style="width: 0%"></div> | |
</div> | |
</div> | |
<div class="bg-gray-50 rounded-lg p-4 max-h-60 overflow-y-auto"> | |
<h4 class="font-medium text-gray-700 mb-2">Activity Log:</h4> | |
<div id="activityLog" class="space-y-2 text-sm"> | |
<!-- Log messages will appear here --> | |
</div> | |
</div> | |
</div> | |
</div> | |
<div id="loadingOverlay" class="fixed inset-0 bg-black bg-opacity-50 flex items-center justify-center z-50 hidden"> | |
<div class="bg-white rounded-xl p-8 max-w-md w-full mx-4 text-center"> | |
<div class="animate-spin rounded-full h-16 w-16 border-t-2 border-b-2 border-purple-500 mx-auto mb-4"></div> | |
<h3 class="text-xl font-semibold mb-2">Processing Website</h3> | |
<p class="text-gray-600 mb-4" id="loadingMessage">Initializing crawler...</p> | |
<div class="w-full bg-gray-200 rounded-full h-2"> | |
<div id="loadingProgress" class="bg-purple-600 h-2 rounded-full" style="width: 0%"></div> | |
</div> | |
</div> | |
</div> | |
<script> | |
document.addEventListener('DOMContentLoaded', function() { | |
const startCrawlBtn = document.getElementById('startCrawl'); | |
const stopCrawlBtn = document.getElementById('stopCrawl'); | |
const targetUrlInput = document.getElementById('targetUrl'); | |
const usePatternCheckbox = document.getElementById('usePattern'); | |
const resultsContainer = document.getElementById('resultsContainer'); | |
const statusPanel = document.getElementById('statusPanel'); | |
const loadingOverlay = document.getElementById('loadingOverlay'); | |
const loadingMessage = document.getElementById('loadingMessage'); | |
const loadingProgress = document.getElementById('loadingProgress'); | |
const visitedCount = document.getElementById('visitedCount'); | |
const codeBlocksCount = document.getElementById('codeBlocksCount'); | |
const progressBar = document.getElementById('progressBar'); | |
const activityLog = document.getElementById('activityLog'); | |
let isCrawling = false; | |
let visitedPages = 0; | |
let foundCodeBlocks = 0; | |
// Simulate crawling (in a real app, this would connect to a backend) | |
startCrawlBtn.addEventListener('click', async function() { | |
const url = targetUrlInput.value.trim(); | |
if (!url) { | |
showAlert('Please enter a valid URL', 'error'); | |
return; | |
} | |
isCrawling = true; | |
visitedPages = 0; | |
foundCodeBlocks = 0; | |
resultsContainer.innerHTML = ''; | |
activityLog.innerHTML = ''; | |
updateStatus(); | |
// Show loading overlay | |
loadingOverlay.classList.remove('hidden'); | |
statusPanel.classList.remove('hidden'); | |
// Simulate initialization | |
loadingMessage.textContent = 'Initializing crawler...'; | |
updateLoadingProgress(10); | |
await delay(800); | |
// Validate URL | |
loadingMessage.textContent = 'Validating URL...'; | |
updateLoadingProgress(20); | |
await delay(600); | |
if (!isValidUrl(url)) { | |
showAlert('Please enter a valid URL starting with http:// or https://', 'error'); | |
loadingOverlay.classList.add('hidden'); | |
isCrawling = false; | |
return; | |
} | |
// Start crawling simulation | |
loadingMessage.textContent = 'Starting crawl process...'; | |
updateLoadingProgress(30); | |
await delay(500); | |
// Hide loading overlay after some time | |
setTimeout(() => { | |
loadingOverlay.classList.add('hidden'); | |
}, 2000); | |
// Simulate crawling pages | |
simulateCrawling(url); | |
}); | |
stopCrawlBtn.addEventListener('click', function() { | |
if (isCrawling) { | |
isCrawling = false; | |
addLogMessage('🚨 Crawling stopped by user. Saving collected data...'); | |
setTimeout(() => { | |
showAlert('Crawling stopped. Collected data has been saved.', 'info'); | |
}, 1000); | |
} | |
}); | |
function simulateCrawling(baseUrl) { | |
// This is just a simulation - in a real app, this would be API calls to a backend | |
const pagesToSimulate = 8; | |
const codeBlocksPerPage = [0, 1, 2, 0, 3, 1, 0, 2]; // Varying number of code blocks | |
let currentPage = 0; | |
const crawlInterval = setInterval(() => { | |
if (!isCrawling || currentPage >= pagesToSimulate) { | |
clearInterval(crawlInterval); | |
if (currentPage >= pagesToSimulate) { | |
addLogMessage('✅ Crawling completed successfully!'); | |
showAlert('Crawling completed! Results are displayed below.', 'success'); | |
} | |
return; | |
} | |
currentPage++; | |
visitedPages++; | |
// Simulate finding code blocks | |
const codeBlocksFound = codeBlocksPerPage[currentPage % codeBlocksPerPage.length]; | |
foundCodeBlocks += codeBlocksFound; | |
// Update UI | |
updateStatus(); | |
addLogMessage(`🌐 Crawling page ${currentPage}/${pagesToSimulate}: ${baseUrl}/page${currentPage}`); | |
if (codeBlocksFound > 0) { | |
addLogMessage(`✅ Found ${codeBlocksFound} Python code blocks`); | |
// Add simulated results | |
for (let i = 0; i < codeBlocksFound; i++) { | |
const result = { | |
url: `${baseUrl}/page${currentPage}#code-${i}`, | |
code: generateRandomPythonCode(), | |
context: `Example context for code block ${i+1}` | |
}; | |
addResultToUI(result); | |
} | |
} | |
// Update progress | |
const progress = Math.min(100, (currentPage / pagesToSimulate) * 100); | |
progressBar.style.width = `${progress}%`; | |
}, 1500); | |
} | |
function addResultToUI(result) { | |
const resultElement = document.createElement('div'); | |
resultElement.className = 'fade-in bg-white rounded-xl shadow-md overflow-hidden'; | |
resultElement.innerHTML = ` | |
<div class="p-6"> | |
<div class="flex justify-between items-start mb-4"> | |
<div> | |
<h3 class="text-lg font-semibold text-gray-800 mb-1">${result.url}</h3> | |
<p class="text-sm text-gray-500">${result.context}</p> | |
</div> | |
<button class="copy-code-btn px-3 py-1 bg-gray-100 text-gray-700 rounded-lg text-sm hover:bg-gray-200 transition flex items-center gap-1"> | |
<i class="far fa-copy"></i> Copy | |
</button> | |
</div> | |
<div class="code-block p-4 overflow-x-auto"> | |
<pre class="text-sm">${escapeHtml(result.code)}</pre> | |
</div> | |
</div> | |
`; | |
resultsContainer.appendChild(resultElement); | |
// Add copy functionality | |
const copyBtn = resultElement.querySelector('.copy-code-btn'); | |
copyBtn.addEventListener('click', function() { | |
navigator.clipboard.writeText(result.code).then(() => { | |
const originalText = copyBtn.innerHTML; | |
copyBtn.innerHTML = '<i class="fas fa-check"></i> Copied!'; | |
setTimeout(() => { | |
copyBtn.innerHTML = originalText; | |
}, 2000); | |
}); | |
}); | |
} | |
function addLogMessage(message) { | |
const now = new Date(); | |
const timeString = now.toLocaleTimeString(); | |
const logEntry = document.createElement('div'); | |
logEntry.className = 'flex items-start gap-2'; | |
logEntry.innerHTML = ` | |
<span class="text-gray-500 text-xs mt-0.5">${timeString}</span> | |
<span class="flex-1">${message}</span> | |
`; | |
activityLog.appendChild(logEntry); | |
activityLog.scrollTop = activityLog.scrollHeight; | |
} | |
function updateStatus() { | |
visitedCount.textContent = visitedPages; | |
codeBlocksCount.textContent = foundCodeBlocks; | |
} | |
function updateLoadingProgress(percent) { | |
loadingProgress.style.width = `${percent}%`; | |
} | |
function showAlert(message, type) { | |
const alertTypes = { | |
error: 'bg-red-100 border-red-400 text-red-700', | |
success: 'bg-green-100 border-green-400 text-green-700', | |
info: 'bg-blue-100 border-blue-400 text-blue-700' | |
}; | |
const alertDiv = document.createElement('div'); | |
alertDiv.className = `fixed top-4 right-4 border-l-4 p-4 rounded shadow-lg ${alertTypes[type] || alertTypes.info} max-w-md z-50 fade-in`; | |
alertDiv.innerHTML = ` | |
<div class="flex items-center"> | |
<div class="flex-shrink-0"> | |
${type === 'error' ? '<i class="fas fa-exclamation-circle"></i>' : ''} | |
${type === 'success' ? '<i class="fas fa-check-circle"></i>' : ''} | |
${type === 'info' ? '<i class="fas fa-info-circle"></i>' : ''} | |
</div> | |
<div class="ml-3"> | |
<p class="text-sm">${message}</p> | |
</div> | |
<button class="ml-auto -mx-1.5 -my-1.5 rounded-lg p-1.5 inline-flex h-8 w-8 focus:outline-none ${type === 'error' ? 'bg-red-100 text-red-500 hover:bg-red-200' : ''} ${type === 'success' ? 'bg-green-100 text-green-500 hover:bg-green-200' : ''} ${type === 'info' ? 'bg-blue-100 text-blue-500 hover:bg-blue-200' : ''}"> | |
<span class="sr-only">Close</span> | |
<i class="fas fa-times"></i> | |
</button> | |
</div> | |
`; | |
document.body.appendChild(alertDiv); | |
// Auto-remove after 5 seconds | |
setTimeout(() => { | |
alertDiv.classList.remove('fade-in'); | |
alertDiv.classList.add('opacity-0', 'transition-opacity', 'duration-300'); | |
setTimeout(() => { | |
alertDiv.remove(); | |
}, 300); | |
}, 5000); | |
// Manual close | |
alertDiv.querySelector('button').addEventListener('click', function() { | |
alertDiv.remove(); | |
}); | |
} | |
// Helper functions | |
function isValidUrl(url) { | |
try { | |
new URL(url); | |
return true; | |
} catch (e) { | |
return false; | |
} | |
} | |
function delay(ms) { | |
return new Promise(resolve => setTimeout(resolve, ms)); | |
} | |
function escapeHtml(unsafe) { | |
return unsafe | |
.replace(/&/g, "&") | |
.replace(/</g, "<") | |
.replace(/>/g, ">") | |
.replace(/"/g, """) | |
.replace(/'/g, "'"); | |
} | |
function generateRandomPythonCode() { | |
const codeSnippets = [ | |
`def calculate_factorial(n): | |
if n == 0: | |
return 1 | |
else: | |
return n * calculate_factorial(n-1) | |
print(calculate_factorial(5))`, | |
`class Animal: | |
def __init__(self, name, species): | |
self.name = name | |
self.species = species | |
def speak(self): | |
return "Some generic sound" | |
class Dog(Animal): | |
def speak(self): | |
return "Woof!"`, | |
`import requests | |
def fetch_data(url): | |
try: | |
response = requests.get(url, timeout=5) | |
response.raise_for_status() | |
return response.json() | |
except requests.exceptions.RequestException as e: | |
print(f"Error fetching data: {e}") | |
return None`, | |
`async def process_data(data): | |
results = [] | |
async with aiohttp.ClientSession() as session: | |
tasks = [fetch_item(session, item) for item in data] | |
results = await asyncio.gather(*tasks) | |
return [r for r in results if r is not None]`, | |
`@app.route('/api/users', methods=['GET']) | |
def get_users(): | |
users = User.query.all() | |
return jsonify([user.to_dict() for user in users])`, | |
`def fibonacci(n): | |
a, b = 0, 1 | |
for _ in range(n): | |
yield a | |
a, b = b, a + b | |
print(list(fibonacci(10)))` | |
]; | |
return codeSnippets[Math.floor(Math.random() * codeSnippets.length)]; | |
} | |
}); | |
</script> | |
<p style="border-radius: 8px; text-align: center; font-size: 12px; color: #fff; margin-top: 16px;position: fixed; left: 8px; bottom: 8px; z-index: 10; background: rgba(0, 0, 0, 0.8); padding: 4px 8px;">Made with <img src="https://enzostvs-deepsite.hf.space/logo.svg" alt="DeepSite Logo" style="width: 16px; height: 16px; vertical-align: middle;display:inline-block;margin-right:3px;filter:brightness(0) invert(1);"><a href="https://enzostvs-deepsite.hf.space" style="color: #fff;text-decoration: underline;" target="_blank" >DeepSite</a> - 🧬 <a href="https://enzostvs-deepsite.hf.space?remix=gewei20/crawler-ui" style="color: #fff;text-decoration: underline;" target="_blank" >Remix</a></p></body> | |
</html> |