sugar404 commited on
Commit
9cfed18
·
verified ·
1 Parent(s): 606e6b8

Create sync_data.sh

Browse files
Files changed (1) hide show
  1. sync_data.sh +200 -0
sync_data.sh ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # 检查环境变量
4
+ if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then
5
+ echo "Starting without backup functionality - missing HF_TOKEN or DATASET_ID"
6
+ exec apache2-foreground
7
+ exit 0
8
+ fi
9
+
10
+ # 设置解密密钥 (非常长的随机字符串以增强安全性)
11
+ ENCRYPTION_KEY=${ENCRYPTION_KEY:-"a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6q7r8s9t0u1v2w3x4y5z6A7B8C9D0E1F2G3H4I5J6K7L8M9N0O1P2Q3R4S5T6U7V8W9X0Y1Z2"}
12
+
13
+ # 激活虚拟环境
14
+ source /opt/venv/bin/activate
15
+
16
+ # 上传备份
17
+ upload_backup() {
18
+ file_path="$1"
19
+ file_name="$2"
20
+ token="$HF_TOKEN"
21
+ repo_id="$DATASET_ID"
22
+ encryption_key="$ENCRYPTION_KEY"
23
+
24
+ python3 -c "
25
+ from huggingface_hub import HfApi
26
+ import sys
27
+ import os
28
+ import base64
29
+ from cryptography.fernet import Fernet
30
+ from cryptography.hazmat.primitives import hashes
31
+ from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
32
+ import io
33
+ def generate_key(password, salt=b'lsky_pro_salt'):
34
+ kdf = PBKDF2HMAC(
35
+ algorithm=hashes.SHA256(),
36
+ length=32,
37
+ salt=salt,
38
+ iterations=100000,
39
+ )
40
+ key = base64.urlsafe_b64encode(kdf.derive(password.encode()))
41
+ return key
42
+ def encrypt_file(file_path, key):
43
+ f = Fernet(key)
44
+ with open(file_path, 'rb') as file:
45
+ file_data = file.read()
46
+ encrypted_data = f.encrypt(file_data)
47
+ encrypted_file_path = file_path + '.enc'
48
+ with open(encrypted_file_path, 'wb') as file:
49
+ file.write(encrypted_data)
50
+ return encrypted_file_path
51
+ def manage_backups(api, repo_id, max_files=10):
52
+ files = api.list_repo_files(repo_id=repo_id, repo_type='dataset')
53
+ backup_files = [f for f in files if f.startswith('lsky_backup_') and f.endswith('.tar.gz.enc')]
54
+ backup_files.sort()
55
+
56
+ if len(backup_files) >= max_files:
57
+ files_to_delete = backup_files[:(len(backup_files) - max_files + 1)]
58
+ for file_to_delete in files_to_delete:
59
+ try:
60
+ api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id, repo_type='dataset')
61
+ print(f'Deleted old backup: {file_to_delete}')
62
+ except Exception as e:
63
+ print(f'Error deleting {file_to_delete}: {str(e)}')
64
+ api = HfApi(token='$token')
65
+ try:
66
+ # 生成加密密钥
67
+ key = generate_key('$encryption_key')
68
+
69
+ # 加密文件
70
+ encrypted_file_path = encrypt_file('$file_path', key)
71
+
72
+ # 上传加密文件
73
+ api.upload_file(
74
+ path_or_fileobj=encrypted_file_path,
75
+ path_in_repo='$file_name.enc',
76
+ repo_id='$repo_id',
77
+ repo_type='dataset'
78
+ )
79
+ print(f'Successfully uploaded encrypted $file_name')
80
+
81
+ # 删除临时加密文件
82
+ os.remove(encrypted_file_path)
83
+
84
+ # 管理备份文件数量
85
+ manage_backups(api, '$repo_id')
86
+ except Exception as e:
87
+ print(f'Error uploading file: {str(e)}')
88
+ "
89
+ }
90
+
91
+ # 下载最新备份
92
+ download_latest_backup() {
93
+ token="$HF_TOKEN"
94
+ repo_id="$DATASET_ID"
95
+ encryption_key="$ENCRYPTION_KEY"
96
+
97
+ python3 -c "
98
+ from huggingface_hub import HfApi
99
+ import sys
100
+ import os
101
+ import tarfile
102
+ import tempfile
103
+ import base64
104
+ from cryptography.fernet import Fernet
105
+ from cryptography.hazmat.primitives import hashes
106
+ from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
107
+ def generate_key(password, salt=b'lsky_pro_salt'):
108
+ kdf = PBKDF2HMAC(
109
+ algorithm=hashes.SHA256(),
110
+ length=32,
111
+ salt=salt,
112
+ iterations=100000,
113
+ )
114
+ key = base64.urlsafe_b64encode(kdf.derive(password.encode()))
115
+ return key
116
+ def decrypt_file(encrypted_file_path, key):
117
+ f = Fernet(key)
118
+ with open(encrypted_file_path, 'rb') as file:
119
+ encrypted_data = file.read()
120
+ decrypted_data = f.decrypt(encrypted_data)
121
+ decrypted_file_path = encrypted_file_path[:-4] # 移除 .enc 后缀
122
+ with open(decrypted_file_path, 'wb') as file:
123
+ file.write(decrypted_data)
124
+ return decrypted_file_path
125
+ api = HfApi(token='$token')
126
+ try:
127
+ files = api.list_repo_files(repo_id='$repo_id', repo_type='dataset')
128
+ backup_files = [f for f in files if f.startswith('lsky_backup_') and f.endswith('.tar.gz.enc')]
129
+
130
+ if not backup_files:
131
+ print('No backup files found')
132
+ sys.exit()
133
+
134
+ latest_backup = sorted(backup_files)[-1]
135
+
136
+ with tempfile.TemporaryDirectory() as temp_dir:
137
+ # 下载加密的备份文件
138
+ encrypted_filepath = api.hf_hub_download(
139
+ repo_id='$repo_id',
140
+ filename=latest_backup,
141
+ repo_type='dataset',
142
+ local_dir=temp_dir
143
+ )
144
+
145
+ if encrypted_filepath and os.path.exists(encrypted_filepath):
146
+ # 生成解密密钥
147
+ key = generate_key('$encryption_key')
148
+
149
+ # 解密文件
150
+ decrypted_filepath = decrypt_file(encrypted_filepath, key)
151
+
152
+ # 解压缩到目标目录
153
+ with tarfile.open(decrypted_filepath, 'r:gz') as tar:
154
+ tar.extractall('/var/www/html')
155
+
156
+ print(f'Successfully restored backup from {latest_backup}')
157
+
158
+ # 清理临时文件
159
+ os.remove(decrypted_filepath)
160
+
161
+ except Exception as e:
162
+ print(f'Error downloading backup: {str(e)}')
163
+ "
164
+ }
165
+
166
+ # 首次启动时下载最新备份
167
+ echo "Checking for latest backup from HuggingFace..."
168
+ download_latest_backup
169
+
170
+ # 同步函数
171
+ sync_data() {
172
+ while true; do
173
+ echo "Starting sync process at $(date)"
174
+
175
+ if [ -d /var/www/html ]; then
176
+ timestamp=$(date +%Y%m%d_%H%M%S)
177
+ backup_file="lsky_backup_${timestamp}.tar.gz"
178
+
179
+ # 压缩数据目录
180
+ tar -czf "/tmp/${backup_file}" -C /var/www/html .
181
+
182
+ echo "Uploading backup to HuggingFace..."
183
+ upload_backup "/tmp/${backup_file}" "${backup_file}"
184
+
185
+ rm -f "/tmp/${backup_file}"
186
+ else
187
+ echo "Application directory does not exist yet, waiting for next sync..."
188
+ fi
189
+
190
+ SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
191
+ echo "Next sync in ${SYNC_INTERVAL} seconds..."
192
+ sleep $SYNC_INTERVAL
193
+ done
194
+ }
195
+
196
+ # 后台启动同步进程
197
+ sync_data &
198
+
199
+ # 启动 Apache
200
+ exec apache2-foreground