File size: 4,834 Bytes
3d6ba31 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import os
import ujson as json
import pygit2
from phylogeny import compute_all_P, compute_sim_matrix
from plotting import get_color, UNKNOWN_COLOR, DEFAULT_COLOR
# ------------------------------------------------------------------------------------------------
#
# Loading data
#
# ------------------------------------------------------------------------------------------------
def load_data():
global UNKNOWN_COLOR, DEFAULT_COLOR, MODEL_SEARCHED_X
data, model_names,families = load_git()
if data is None:
return
#Rename families if needed
with open('family_table.json','r') as f:
rename_table = json.load(f)
for i in range(len(model_names)):
try:
families[i] = rename_table[model_names[i]]
except KeyError:
pass
all_P = compute_all_P(data, model_names)
sim_matrix = compute_sim_matrix(model_names, all_P)
k = list(all_P.keys())[0]
unknown_color = UNKNOWN_COLOR
unique_families = list(set([f for f in families]))
colors = {}
idx = 0
for i, family in enumerate(unique_families):
color = get_color(idx)
idx += 1
while color == unknown_color: # Avoid using the unknown color for a family
color = get_color(idx)
idx += 1
colors[family] = color
colors['?'] = unknown_color # Assign the unknown color to the unknown family
return data, model_names, families, sim_matrix, colors
def load_git():
cred = pygit2.UserPass(os.environ['GITHUB_USERNAME'], os.environ['GITHUB_TOKEN'])
if os.path.exists('Data'):
repo = pygit2.Repository('Data')
remote = repo.remotes['origin'] # Use named reference instead of index
remote.fetch()
# Get the current branch name
branch_name = repo.head.shorthand
# Find the reference to the remote branch
remote_ref_name = f'refs/remotes/origin/{branch_name}'
# Merge the changes into the current branch
remote_commit = repo.lookup_reference(remote_ref_name).target
else:
repo = pygit2.clone_repository('https://github.com/PhyloLM/Data', './Data', bare=False, callbacks=GitHubRemoteCallbacks(os.environ['GITHUB_USERNAME'], os.environ['GITHUB_TOKEN']))
data_array = []
model_names = []
families = []
for foname in os.listdir('Data/math'):
#check if it is a directory
if not os.path.isdir(os.path.join('Data/math',foname)):
continue
for fname in os.listdir('Data/math/'+foname):
if not fname.endswith('.json'):
continue
with open(os.path.join('Data/math',foname,fname),'r') as f:
d = json.load(f)
families.append(d['family'])
model_names.append(foname+'/'+fname[:-5])
data_array.append(d['alleles'])
if data_array == []:
return None,[],[]
return data_array,model_names,families
# ------------------------------------------------------------------------------------------------
#
# Git functions
#
# ------------------------------------------------------------------------------------------------
class GitHubRemoteCallbacks(pygit2.RemoteCallbacks):
def __init__(self, username, token):
self.username = username
self.token = token
super().__init__()
def credentials(self, url, username_from_url, allowed_types):
return pygit2.UserPass(self.username, self.token)
# ------------------------------------------------------------------------------------------------
#
# Saving data
#
# ------------------------------------------------------------------------------------------------
def save_git(alleles,genes,model,family):
repo = pygit2.Repository('Data')
remo = repo.remotes['origin']
d = {'family':family,'alleles':alleles}
model_name = model
data_path = f'math/{model_name}.json'
path = os.path.join('Data',data_path)
#create the file folder path
if not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path), exist_ok=True)
#Open the file
with open(path,'w') as f:
json.dump(d,f)
repo.index.add(data_path)
repo.index.write()
reference='HEAD'
tree = repo.index.write_tree()
author = pygit2.Signature(os.environ['GITHUB_USERNAME'], os.environ['GITHUB_MAIL'])
commiter = pygit2.Signature(os.environ['GITHUB_USERNAME'], os.environ['GITHUB_MAIL'])
oid = repo.create_commit(reference, author, commiter, f'Add data for model {model}', tree, [repo.head.target])
remo.push(['refs/heads/main'],callbacks=GitHubRemoteCallbacks(os.environ['GITHUB_USERNAME'],os.environ['GITHUB_TOKEN'])) |