File size: 5,412 Bytes
3d6ba31
 
 
dc83bf2
 
3d6ba31
 
 
 
 
 
 
 
15c54ca
3d6ba31
15c54ca
3d6ba31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc83bf2
3d6ba31
dc83bf2
 
 
 
 
 
3d6ba31
 
dc83bf2
 
3d6ba31
 
 
 
 
 
 
 
 
dc83bf2
 
 
 
 
 
 
 
3d6ba31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import os
import ujson as json
import pygit2
import shutil
from pygit2.enums import MergeFavor

from phylogeny import compute_all_P, compute_sim_matrix
from plotting import get_color, UNKNOWN_COLOR, DEFAULT_COLOR
# ------------------------------------------------------------------------------------------------
#
#                                       Loading data
#
# ------------------------------------------------------------------------------------------------
def load_data(force_clone=False):
    global UNKNOWN_COLOR, DEFAULT_COLOR, MODEL_SEARCHED_X
    data, model_names,families = load_git(force_clone=force_clone)
    if data is None:
        return
    
    #Rename families if needed
    with open('family_table.json','r') as f:
        rename_table = json.load(f)

    for i in range(len(model_names)):
        try:
            families[i] = rename_table[model_names[i]]
        except KeyError:
            pass

    all_P = compute_all_P(data, model_names)
    sim_matrix = compute_sim_matrix(model_names, all_P)

    k = list(all_P.keys())[0]

    unknown_color = UNKNOWN_COLOR

    unique_families = list(set([f for f in families]))
    colors = {}
    idx = 0
    for i, family in enumerate(unique_families):
        color = get_color(idx)
        idx += 1
        while color == unknown_color: # Avoid using the unknown color for a family
            color = get_color(idx)
            idx += 1
        colors[family] = color

    colors['?'] = unknown_color # Assign the unknown color to the unknown family
    
    return data, model_names, families, sim_matrix, colors

def load_git(force_clone = False):
    cred = pygit2.UserPass(os.environ['GITHUB_USERNAME'], os.environ['GITHUB_TOKEN'])
    if not os.path.exists('Data') or force_clone:
        # Remove the existing directory if it exists
        if os.path.exists('Data'):
            shutil.rmtree('Data')
        repo = pygit2.clone_repository('https://github.com/PhyloLM/Data', './Data', bare=False, callbacks=GitHubRemoteCallbacks(os.environ['GITHUB_USERNAME'], os.environ['GITHUB_TOKEN']))
    else:
        repo = pygit2.Repository('Data')
        remote = repo.remotes['origin']  # Use named reference instead of index
        fetch_results = remote.fetch()
        print(fetch_results)

        # Get the current branch name
        branch_name = repo.head.shorthand

        # Find the reference to the remote branch
        remote_ref_name = f'refs/remotes/origin/{branch_name}'

        # Merge the changes into the current branch
        remote_commit = repo.lookup_reference(remote_ref_name).target
        
        #Resolve conflicts if any : strategy : theirs
        try:
            repo.merge(remote_commit)
        except Exception as e:
            print(f"Merge error: {e}")
            # Redownload the repository if merge fails
            return load_git(force_clone=True)
    data_array = []
    model_names = []
    families = []
    for foname in os.listdir('Data/math'):
        #check if it is a directory
        if not os.path.isdir(os.path.join('Data/math',foname)):
            continue
        for fname in os.listdir('Data/math/'+foname):
            if not fname.endswith('.json'):
                continue
            with open(os.path.join('Data/math',foname,fname),'r') as f:
                d = json.load(f)
                families.append(d['family'])
                model_names.append(foname+'/'+fname[:-5])
                data_array.append(d['alleles'])

    if data_array == []:
        return None,[],[]
    return data_array,model_names,families

# ------------------------------------------------------------------------------------------------
#
#                                       Git functions
#
# ------------------------------------------------------------------------------------------------

class GitHubRemoteCallbacks(pygit2.RemoteCallbacks):
    def __init__(self, username, token):
        self.username = username
        self.token = token
        super().__init__()
        
    def credentials(self, url, username_from_url, allowed_types):
        return pygit2.UserPass(self.username, self.token)
    
# ------------------------------------------------------------------------------------------------
#
#                                       Saving data
#
# ------------------------------------------------------------------------------------------------

def save_git(alleles,genes,model,family):
    repo = pygit2.Repository('Data')
    remo = repo.remotes['origin']

    d = {'family':family,'alleles':alleles}
    model_name = model
    data_path = f'math/{model_name}.json'
    path = os.path.join('Data',data_path)
    #create the file folder path
    if not os.path.exists(os.path.dirname(path)):
        os.makedirs(os.path.dirname(path), exist_ok=True)
    #Open the file
    with open(path,'w') as f:
        json.dump(d,f)

    repo.index.add(data_path)
    repo.index.write()
    reference='HEAD'
    tree = repo.index.write_tree()
    author = pygit2.Signature(os.environ['GITHUB_USERNAME'], os.environ['GITHUB_MAIL'])
    commiter = pygit2.Signature(os.environ['GITHUB_USERNAME'], os.environ['GITHUB_MAIL'])
    oid = repo.create_commit(reference, author, commiter, f'Add data for model {model}', tree, [repo.head.target])
    remo.push(['refs/heads/main'],callbacks=GitHubRemoteCallbacks(os.environ['GITHUB_USERNAME'],os.environ['GITHUB_TOKEN']))