sugar404 commited on
Commit
b3038f8
·
verified ·
1 Parent(s): 7b04621

Create sync_data.sh

Browse files
Files changed (1) hide show
  1. sync_data.sh +104 -0
sync_data.sh ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # 检查环境变量
4
+ if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then
5
+ echo "Starting without backup functionality - missing HF_TOKEN or DATASET_ID"
6
+ exec python main.py
7
+ fi
8
+
9
+ # 登录HuggingFace (使用环境变量方式避免交互问题)
10
+ export HUGGING_FACE_HUB_TOKEN=$HF_TOKEN
11
+
12
+ # 同步函数
13
+ sync_data() {
14
+ while true; do
15
+ echo "Starting sync process at $(date)"
16
+
17
+ # 创建临时压缩文件
18
+ cd /app
19
+ timestamp=$(date +%Y%m%d_%H%M%S)
20
+ backup_file="backup_${timestamp}.tar.gz"
21
+
22
+ tar -czf "/tmp/${backup_file}" data/
23
+
24
+ python3 -c "
25
+ from huggingface_hub import HfApi
26
+ import os
27
+ def manage_backups(api, repo_id, max_files=50):
28
+ files = api.list_repo_files(repo_id=repo_id, repo_type='dataset')
29
+ backup_files = [f for f in files if f.startswith('backup_') and f.endswith('.tar.gz')]
30
+ backup_files.sort()
31
+
32
+ if len(backup_files) >= max_files:
33
+ files_to_delete = backup_files[:(len(backup_files) - max_files + 1)]
34
+ for file_to_delete in files_to_delete:
35
+ try:
36
+ api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id, repo_type='dataset')
37
+ print(f'Deleted old backup: {file_to_delete}')
38
+ except Exception as e:
39
+ print(f'Error deleting {file_to_delete}: {str(e)}')
40
+ try:
41
+ api = HfApi()
42
+ api.upload_file(
43
+ path_or_fileobj='/tmp/${backup_file}',
44
+ path_in_repo='${backup_file}',
45
+ repo_id='${DATASET_ID}',
46
+ repo_type='dataset'
47
+ )
48
+ print('Backup uploaded successfully')
49
+
50
+ manage_backups(api, '${DATASET_ID}')
51
+ except Exception as e:
52
+ print(f'Backup failed: {str(e)}')
53
+ "
54
+ # 清理临时文件
55
+ rm -f "/tmp/${backup_file}"
56
+
57
+ # 设置同步间隔
58
+ SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
59
+ echo "Next sync in ${SYNC_INTERVAL} seconds..."
60
+ sleep $SYNC_INTERVAL
61
+ done
62
+ }
63
+
64
+ # 恢复函数
65
+ restore_latest() {
66
+ echo "Attempting to restore latest backup..."
67
+ python3 -c "
68
+ try:
69
+ from huggingface_hub import HfApi
70
+ import os
71
+
72
+ api = HfApi()
73
+ files = api.list_repo_files('${DATASET_ID}', repo_type='dataset')
74
+ backup_files = [f for f in files if f.startswith('backup_') and f.endswith('.tar.gz')]
75
+
76
+ if backup_files:
77
+ latest = sorted(backup_files)[-1]
78
+ api.hf_hub_download(
79
+ repo_id='${DATASET_ID}',
80
+ filename=latest,
81
+ repo_type='dataset',
82
+ local_dir='/tmp'
83
+ )
84
+ os.system(f'tar -xzf /tmp/{latest} -C /app')
85
+ os.remove(f'/tmp/{latest}')
86
+ print(f'Restored from {latest}')
87
+ else:
88
+ print('No backup found')
89
+ except Exception as e:
90
+ print(f'Restore failed: {str(e)}')
91
+ "
92
+ }
93
+
94
+ # 主程序
95
+ (
96
+ # 尝试恢复
97
+ restore_latest
98
+
99
+ # 启动同步进程
100
+ sync_data &
101
+
102
+ # 启动主应用
103
+ exec python main.py
104
+ ) 2>&1 | tee -a /app/data/backup.log