File size: 6,196 Bytes
9cfed18 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 |
#!/bin/bash
# 检查环境变量
if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then
echo "Starting without backup functionality - missing HF_TOKEN or DATASET_ID"
exec apache2-foreground
exit 0
fi
# 设置解密密钥 (非常长的随机字符串以增强安全性)
ENCRYPTION_KEY=${ENCRYPTION_KEY:-"a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6q7r8s9t0u1v2w3x4y5z6A7B8C9D0E1F2G3H4I5J6K7L8M9N0O1P2Q3R4S5T6U7V8W9X0Y1Z2"}
# 激活虚拟环境
source /opt/venv/bin/activate
# 上传备份
upload_backup() {
file_path="$1"
file_name="$2"
token="$HF_TOKEN"
repo_id="$DATASET_ID"
encryption_key="$ENCRYPTION_KEY"
python3 -c "
from huggingface_hub import HfApi
import sys
import os
import base64
from cryptography.fernet import Fernet
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
import io
def generate_key(password, salt=b'lsky_pro_salt'):
kdf = PBKDF2HMAC(
algorithm=hashes.SHA256(),
length=32,
salt=salt,
iterations=100000,
)
key = base64.urlsafe_b64encode(kdf.derive(password.encode()))
return key
def encrypt_file(file_path, key):
f = Fernet(key)
with open(file_path, 'rb') as file:
file_data = file.read()
encrypted_data = f.encrypt(file_data)
encrypted_file_path = file_path + '.enc'
with open(encrypted_file_path, 'wb') as file:
file.write(encrypted_data)
return encrypted_file_path
def manage_backups(api, repo_id, max_files=10):
files = api.list_repo_files(repo_id=repo_id, repo_type='dataset')
backup_files = [f for f in files if f.startswith('lsky_backup_') and f.endswith('.tar.gz.enc')]
backup_files.sort()
if len(backup_files) >= max_files:
files_to_delete = backup_files[:(len(backup_files) - max_files + 1)]
for file_to_delete in files_to_delete:
try:
api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id, repo_type='dataset')
print(f'Deleted old backup: {file_to_delete}')
except Exception as e:
print(f'Error deleting {file_to_delete}: {str(e)}')
api = HfApi(token='$token')
try:
# 生成加密密钥
key = generate_key('$encryption_key')
# 加密文件
encrypted_file_path = encrypt_file('$file_path', key)
# 上传加密文件
api.upload_file(
path_or_fileobj=encrypted_file_path,
path_in_repo='$file_name.enc',
repo_id='$repo_id',
repo_type='dataset'
)
print(f'Successfully uploaded encrypted $file_name')
# 删除临时加密文件
os.remove(encrypted_file_path)
# 管理备份文件数量
manage_backups(api, '$repo_id')
except Exception as e:
print(f'Error uploading file: {str(e)}')
"
}
# 下载最新备份
download_latest_backup() {
token="$HF_TOKEN"
repo_id="$DATASET_ID"
encryption_key="$ENCRYPTION_KEY"
python3 -c "
from huggingface_hub import HfApi
import sys
import os
import tarfile
import tempfile
import base64
from cryptography.fernet import Fernet
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
def generate_key(password, salt=b'lsky_pro_salt'):
kdf = PBKDF2HMAC(
algorithm=hashes.SHA256(),
length=32,
salt=salt,
iterations=100000,
)
key = base64.urlsafe_b64encode(kdf.derive(password.encode()))
return key
def decrypt_file(encrypted_file_path, key):
f = Fernet(key)
with open(encrypted_file_path, 'rb') as file:
encrypted_data = file.read()
decrypted_data = f.decrypt(encrypted_data)
decrypted_file_path = encrypted_file_path[:-4] # 移除 .enc 后缀
with open(decrypted_file_path, 'wb') as file:
file.write(decrypted_data)
return decrypted_file_path
api = HfApi(token='$token')
try:
files = api.list_repo_files(repo_id='$repo_id', repo_type='dataset')
backup_files = [f for f in files if f.startswith('lsky_backup_') and f.endswith('.tar.gz.enc')]
if not backup_files:
print('No backup files found')
sys.exit()
latest_backup = sorted(backup_files)[-1]
with tempfile.TemporaryDirectory() as temp_dir:
# 下载加密的备份文件
encrypted_filepath = api.hf_hub_download(
repo_id='$repo_id',
filename=latest_backup,
repo_type='dataset',
local_dir=temp_dir
)
if encrypted_filepath and os.path.exists(encrypted_filepath):
# 生成解密密钥
key = generate_key('$encryption_key')
# 解密文件
decrypted_filepath = decrypt_file(encrypted_filepath, key)
# 解压缩到目标目录
with tarfile.open(decrypted_filepath, 'r:gz') as tar:
tar.extractall('/var/www/html')
print(f'Successfully restored backup from {latest_backup}')
# 清理临时文件
os.remove(decrypted_filepath)
except Exception as e:
print(f'Error downloading backup: {str(e)}')
"
}
# 首次启动时下载最新备份
echo "Checking for latest backup from HuggingFace..."
download_latest_backup
# 同步函数
sync_data() {
while true; do
echo "Starting sync process at $(date)"
if [ -d /var/www/html ]; then
timestamp=$(date +%Y%m%d_%H%M%S)
backup_file="lsky_backup_${timestamp}.tar.gz"
# 压缩数据目录
tar -czf "/tmp/${backup_file}" -C /var/www/html .
echo "Uploading backup to HuggingFace..."
upload_backup "/tmp/${backup_file}" "${backup_file}"
rm -f "/tmp/${backup_file}"
else
echo "Application directory does not exist yet, waiting for next sync..."
fi
SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
echo "Next sync in ${SYNC_INTERVAL} seconds..."
sleep $SYNC_INTERVAL
done
}
# 后台启动同步进程
sync_data &
# 启动 Apache
exec apache2-foreground |