File size: 5,412 Bytes
3d6ba31 dc83bf2 3d6ba31 15c54ca 3d6ba31 15c54ca 3d6ba31 dc83bf2 3d6ba31 dc83bf2 3d6ba31 dc83bf2 3d6ba31 dc83bf2 3d6ba31 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import os
import ujson as json
import pygit2
import shutil
from pygit2.enums import MergeFavor
from phylogeny import compute_all_P, compute_sim_matrix
from plotting import get_color, UNKNOWN_COLOR, DEFAULT_COLOR
# ------------------------------------------------------------------------------------------------
#
# Loading data
#
# ------------------------------------------------------------------------------------------------
def load_data(force_clone=False):
global UNKNOWN_COLOR, DEFAULT_COLOR, MODEL_SEARCHED_X
data, model_names,families = load_git(force_clone=force_clone)
if data is None:
return
#Rename families if needed
with open('family_table.json','r') as f:
rename_table = json.load(f)
for i in range(len(model_names)):
try:
families[i] = rename_table[model_names[i]]
except KeyError:
pass
all_P = compute_all_P(data, model_names)
sim_matrix = compute_sim_matrix(model_names, all_P)
k = list(all_P.keys())[0]
unknown_color = UNKNOWN_COLOR
unique_families = list(set([f for f in families]))
colors = {}
idx = 0
for i, family in enumerate(unique_families):
color = get_color(idx)
idx += 1
while color == unknown_color: # Avoid using the unknown color for a family
color = get_color(idx)
idx += 1
colors[family] = color
colors['?'] = unknown_color # Assign the unknown color to the unknown family
return data, model_names, families, sim_matrix, colors
def load_git(force_clone = False):
cred = pygit2.UserPass(os.environ['GITHUB_USERNAME'], os.environ['GITHUB_TOKEN'])
if not os.path.exists('Data') or force_clone:
# Remove the existing directory if it exists
if os.path.exists('Data'):
shutil.rmtree('Data')
repo = pygit2.clone_repository('https://github.com/PhyloLM/Data', './Data', bare=False, callbacks=GitHubRemoteCallbacks(os.environ['GITHUB_USERNAME'], os.environ['GITHUB_TOKEN']))
else:
repo = pygit2.Repository('Data')
remote = repo.remotes['origin'] # Use named reference instead of index
fetch_results = remote.fetch()
print(fetch_results)
# Get the current branch name
branch_name = repo.head.shorthand
# Find the reference to the remote branch
remote_ref_name = f'refs/remotes/origin/{branch_name}'
# Merge the changes into the current branch
remote_commit = repo.lookup_reference(remote_ref_name).target
#Resolve conflicts if any : strategy : theirs
try:
repo.merge(remote_commit)
except Exception as e:
print(f"Merge error: {e}")
# Redownload the repository if merge fails
return load_git(force_clone=True)
data_array = []
model_names = []
families = []
for foname in os.listdir('Data/math'):
#check if it is a directory
if not os.path.isdir(os.path.join('Data/math',foname)):
continue
for fname in os.listdir('Data/math/'+foname):
if not fname.endswith('.json'):
continue
with open(os.path.join('Data/math',foname,fname),'r') as f:
d = json.load(f)
families.append(d['family'])
model_names.append(foname+'/'+fname[:-5])
data_array.append(d['alleles'])
if data_array == []:
return None,[],[]
return data_array,model_names,families
# ------------------------------------------------------------------------------------------------
#
# Git functions
#
# ------------------------------------------------------------------------------------------------
class GitHubRemoteCallbacks(pygit2.RemoteCallbacks):
def __init__(self, username, token):
self.username = username
self.token = token
super().__init__()
def credentials(self, url, username_from_url, allowed_types):
return pygit2.UserPass(self.username, self.token)
# ------------------------------------------------------------------------------------------------
#
# Saving data
#
# ------------------------------------------------------------------------------------------------
def save_git(alleles,genes,model,family):
repo = pygit2.Repository('Data')
remo = repo.remotes['origin']
d = {'family':family,'alleles':alleles}
model_name = model
data_path = f'math/{model_name}.json'
path = os.path.join('Data',data_path)
#create the file folder path
if not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path), exist_ok=True)
#Open the file
with open(path,'w') as f:
json.dump(d,f)
repo.index.add(data_path)
repo.index.write()
reference='HEAD'
tree = repo.index.write_tree()
author = pygit2.Signature(os.environ['GITHUB_USERNAME'], os.environ['GITHUB_MAIL'])
commiter = pygit2.Signature(os.environ['GITHUB_USERNAME'], os.environ['GITHUB_MAIL'])
oid = repo.create_commit(reference, author, commiter, f'Add data for model {model}', tree, [repo.head.target])
remo.push(['refs/heads/main'],callbacks=GitHubRemoteCallbacks(os.environ['GITHUB_USERNAME'],os.environ['GITHUB_TOKEN'])) |