|
#!/bin/bash |
|
|
|
|
|
if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then |
|
echo "Starting without backup functionality - missing HF_TOKEN or DATASET_ID" |
|
exec apache2-foreground |
|
exit 0 |
|
fi |
|
|
|
|
|
ENCRYPTION_KEY=${ENCRYPTION_KEY:-"a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6q7r8s9t0u1v2w3x4y5z6A7B8C9D0E1F2G3H4I5J6K7L8M9N0O1P2Q3R4S5T6U7V8W9X0Y1Z2"} |
|
|
|
|
|
source /opt/venv/bin/activate |
|
|
|
|
|
upload_backup() { |
|
file_path="$1" |
|
file_name="$2" |
|
token="$HF_TOKEN" |
|
repo_id="$DATASET_ID" |
|
encryption_key="$ENCRYPTION_KEY" |
|
|
|
python3 -c " |
|
from huggingface_hub import HfApi |
|
import sys |
|
import os |
|
import base64 |
|
from cryptography.fernet import Fernet |
|
from cryptography.hazmat.primitives import hashes |
|
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC |
|
import io |
|
def generate_key(password, salt=b'lsky_pro_salt'): |
|
kdf = PBKDF2HMAC( |
|
algorithm=hashes.SHA256(), |
|
length=32, |
|
salt=salt, |
|
iterations=100000, |
|
) |
|
key = base64.urlsafe_b64encode(kdf.derive(password.encode())) |
|
return key |
|
def encrypt_file(file_path, key): |
|
f = Fernet(key) |
|
with open(file_path, 'rb') as file: |
|
file_data = file.read() |
|
encrypted_data = f.encrypt(file_data) |
|
encrypted_file_path = file_path + '.enc' |
|
with open(encrypted_file_path, 'wb') as file: |
|
file.write(encrypted_data) |
|
return encrypted_file_path |
|
def manage_backups(api, repo_id, max_files=10): |
|
files = api.list_repo_files(repo_id=repo_id, repo_type='dataset') |
|
backup_files = [f for f in files if f.startswith('lsky_backup_') and f.endswith('.tar.gz.enc')] |
|
backup_files.sort() |
|
|
|
if len(backup_files) >= max_files: |
|
files_to_delete = backup_files[:(len(backup_files) - max_files + 1)] |
|
for file_to_delete in files_to_delete: |
|
try: |
|
api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id, repo_type='dataset') |
|
print(f'Deleted old backup: {file_to_delete}') |
|
except Exception as e: |
|
print(f'Error deleting {file_to_delete}: {str(e)}') |
|
api = HfApi(token='$token') |
|
try: |
|
# 生成加密密钥 |
|
key = generate_key('$encryption_key') |
|
|
|
# 加密文件 |
|
encrypted_file_path = encrypt_file('$file_path', key) |
|
|
|
# 上传加密文件 |
|
api.upload_file( |
|
path_or_fileobj=encrypted_file_path, |
|
path_in_repo='$file_name.enc', |
|
repo_id='$repo_id', |
|
repo_type='dataset' |
|
) |
|
print(f'Successfully uploaded encrypted $file_name') |
|
|
|
# 删除临时加密文件 |
|
os.remove(encrypted_file_path) |
|
|
|
# 管理备份文件数量 |
|
manage_backups(api, '$repo_id') |
|
except Exception as e: |
|
print(f'Error uploading file: {str(e)}') |
|
" |
|
} |
|
|
|
|
|
download_latest_backup() { |
|
token="$HF_TOKEN" |
|
repo_id="$DATASET_ID" |
|
encryption_key="$ENCRYPTION_KEY" |
|
|
|
python3 -c " |
|
from huggingface_hub import HfApi |
|
import sys |
|
import os |
|
import tarfile |
|
import tempfile |
|
import base64 |
|
from cryptography.fernet import Fernet |
|
from cryptography.hazmat.primitives import hashes |
|
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC |
|
def generate_key(password, salt=b'lsky_pro_salt'): |
|
kdf = PBKDF2HMAC( |
|
algorithm=hashes.SHA256(), |
|
length=32, |
|
salt=salt, |
|
iterations=100000, |
|
) |
|
key = base64.urlsafe_b64encode(kdf.derive(password.encode())) |
|
return key |
|
def decrypt_file(encrypted_file_path, key): |
|
f = Fernet(key) |
|
with open(encrypted_file_path, 'rb') as file: |
|
encrypted_data = file.read() |
|
decrypted_data = f.decrypt(encrypted_data) |
|
decrypted_file_path = encrypted_file_path[:-4] # 移除 .enc 后缀 |
|
with open(decrypted_file_path, 'wb') as file: |
|
file.write(decrypted_data) |
|
return decrypted_file_path |
|
api = HfApi(token='$token') |
|
try: |
|
files = api.list_repo_files(repo_id='$repo_id', repo_type='dataset') |
|
backup_files = [f for f in files if f.startswith('lsky_backup_') and f.endswith('.tar.gz.enc')] |
|
|
|
if not backup_files: |
|
print('No backup files found') |
|
sys.exit() |
|
|
|
latest_backup = sorted(backup_files)[-1] |
|
|
|
with tempfile.TemporaryDirectory() as temp_dir: |
|
# 下载加密的备份文件 |
|
encrypted_filepath = api.hf_hub_download( |
|
repo_id='$repo_id', |
|
filename=latest_backup, |
|
repo_type='dataset', |
|
local_dir=temp_dir |
|
) |
|
|
|
if encrypted_filepath and os.path.exists(encrypted_filepath): |
|
# 生成解密密钥 |
|
key = generate_key('$encryption_key') |
|
|
|
# 解密文件 |
|
decrypted_filepath = decrypt_file(encrypted_filepath, key) |
|
|
|
# 解压缩到目标目录 |
|
with tarfile.open(decrypted_filepath, 'r:gz') as tar: |
|
tar.extractall('/var/www/html') |
|
|
|
print(f'Successfully restored backup from {latest_backup}') |
|
|
|
# 清理临时文件 |
|
os.remove(decrypted_filepath) |
|
|
|
except Exception as e: |
|
print(f'Error downloading backup: {str(e)}') |
|
" |
|
} |
|
|
|
|
|
echo "Checking for latest backup from HuggingFace..." |
|
download_latest_backup |
|
|
|
|
|
sync_data() { |
|
while true; do |
|
echo "Starting sync process at $(date)" |
|
|
|
if [ -d /var/www/html ]; then |
|
timestamp=$(date +%Y%m%d_%H%M%S) |
|
backup_file="lsky_backup_${timestamp}.tar.gz" |
|
|
|
|
|
tar -czf "/tmp/${backup_file}" -C /var/www/html . |
|
|
|
echo "Uploading backup to HuggingFace..." |
|
upload_backup "/tmp/${backup_file}" "${backup_file}" |
|
|
|
rm -f "/tmp/${backup_file}" |
|
else |
|
echo "Application directory does not exist yet, waiting for next sync..." |
|
fi |
|
|
|
SYNC_INTERVAL=${SYNC_INTERVAL:-7200} |
|
echo "Next sync in ${SYNC_INTERVAL} seconds..." |
|
sleep $SYNC_INTERVAL |
|
done |
|
} |
|
|
|
|
|
sync_data & |
|
|
|
|
|
exec apache2-foreground |