Fixed git push issue
Browse files- app.py +56 -14
- loading.py +19 -7
app.py
CHANGED
@@ -2,6 +2,8 @@ import gradio as gr
|
|
2 |
import os
|
3 |
import numpy as np
|
4 |
import ujson as json
|
|
|
|
|
5 |
|
6 |
from loading import load_data, save_git
|
7 |
from tools import compute_ordered_matrix
|
@@ -101,8 +103,8 @@ def search_bar_gr(model_names,slider=True,double_search=False,key=None):
|
|
101 |
ret.insert(0,col2)
|
102 |
return ret
|
103 |
|
104 |
-
import spaces
|
105 |
-
|
106 |
def _run(path,genes,N,progress_bar):
|
107 |
#Load the model
|
108 |
progress_bar(0.20, desc="Loading Model...",total=100)
|
@@ -110,28 +112,35 @@ def _run(path,genes,N,progress_bar):
|
|
110 |
model,tokenizer = load_model(path)
|
111 |
except ValueError as e:
|
112 |
print(f"Error loading model '{path}': {e}")
|
113 |
-
gr.Warning("Model couldn't load. This space currently only works with AutoModelForCausalLM models and
|
114 |
return None
|
115 |
except OSError as e:
|
116 |
print(f"Error loading model '{path}': {e}")
|
117 |
-
gr.Warning("Model doesn't seem to exist on the HuggingFace Hub or might be gated. Please check the model name and
|
118 |
return None
|
119 |
except RuntimeError as e:
|
120 |
if 'out of memory' in str(e):
|
121 |
print(f"Error loading model '{path}': {e}")
|
122 |
-
gr.Warning("Loading the model triggered an out of memory error. It may be too big for the GPU (80Go RAM). Please
|
123 |
return None
|
124 |
else:
|
125 |
print(f"Error loading model '{path}': {e}")
|
126 |
-
gr.Warning("Model couldn't be loaded.
|
127 |
return None
|
128 |
except Exception as e:
|
129 |
print(f"Error loading model '{path}': {e}")
|
130 |
-
gr.Warning("Model couldn't be loaded.
|
131 |
return None
|
132 |
progress_bar(0.25, desc="Generating data...",total=100)
|
|
|
133 |
for i,output in enumerate(llm_run(model,tokenizer,genes,N)):
|
134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
return output
|
136 |
|
137 |
def run(path,progress_bar):
|
@@ -171,7 +180,37 @@ def run(path,progress_bar):
|
|
171 |
progress_bar(0.95, desc="Saving data ...",total=100)
|
172 |
|
173 |
alleles = [[compl[j]['generated_text'][len(gene):][:4] for j in range(len(compl))] for gene,compl in zip(genes,output)]
|
174 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
|
176 |
progress_bar(1, desc="Done!",total=100)
|
177 |
|
@@ -222,6 +261,8 @@ def reload_env():
|
|
222 |
|
223 |
|
224 |
# Load environment variables
|
|
|
|
|
225 |
|
226 |
USERNAME = os.environ['GITHUB_USERNAME']
|
227 |
TOKEN = os.environ['GITHUB_TOKEN']
|
@@ -269,7 +310,7 @@ with gr.Blocks(title="PhyloLM", theme=gr.themes.Default()) as demo:
|
|
269 |
"- A similarity matrix (values range from 0 = dissimilar to 1 = highly similar). \n"
|
270 |
"- 2D and 3D scatter plots representing how close or far from each other LLMs are (plotted using UMAP). \n"
|
271 |
"- A tree to visualize distances between models (distance from leaf A to leaf B in the tree is similar to the distance between the two models)\n\n"
|
272 |
-
"Models are colored according to their family (e.g., LLaMA, OPT, Mistral) for the ones that were in the original paper.
|
273 |
)
|
274 |
|
275 |
# Load models for the dropdown
|
@@ -322,7 +363,7 @@ with gr.Blocks(title="PhyloLM", theme=gr.themes.Default()) as demo:
|
|
322 |
|
323 |
|
324 |
# Submit model section
|
325 |
-
gr.Markdown("##
|
326 |
|
327 |
gr.Markdown(
|
328 |
"You may contribute new models to this collaborative space using compute resources. "
|
@@ -341,8 +382,8 @@ with gr.Blocks(title="PhyloLM", theme=gr.themes.Default()) as demo:
|
|
341 |
)
|
342 |
|
343 |
with gr.Group():
|
344 |
-
model_input = gr.Textbox(label="Model", interactive=
|
345 |
-
submit_btn = gr.Button("Run PhyloLM", variant="primary",interactive=
|
346 |
|
347 |
|
348 |
# Disclaimer and citation
|
@@ -386,7 +427,8 @@ url={https://openreview.net/forum?id=rTQNGQxm4K}
|
|
386 |
tree_alpha_marker.change(fn=lambda x : slider_changeAlphaMarkers(x,'fig4'), inputs=tree_alpha_marker, outputs=FIGS_OBJECTS)
|
387 |
|
388 |
# Run PhyloLM button
|
389 |
-
submit_btn.click(fn=prepare_run, inputs=[model_input], outputs=[model_input]).then(fn=reload_env, inputs=[], outputs=FIGS_OBJECTS+ [sim_mat_search_x, sim_mat_search_y, viz_search, tree_search])
|
|
|
390 |
|
391 |
#Set more globals
|
392 |
SIM_MAT_SEARCH_X = sim_mat_search_x
|
|
|
2 |
import os
|
3 |
import numpy as np
|
4 |
import ujson as json
|
5 |
+
import time
|
6 |
+
from threading import Thread
|
7 |
|
8 |
from loading import load_data, save_git
|
9 |
from tools import compute_ordered_matrix
|
|
|
103 |
ret.insert(0,col2)
|
104 |
return ret
|
105 |
|
106 |
+
#import spaces
|
107 |
+
#@spaces.GPU(duration=300)
|
108 |
def _run(path,genes,N,progress_bar):
|
109 |
#Load the model
|
110 |
progress_bar(0.20, desc="Loading Model...",total=100)
|
|
|
112 |
model,tokenizer = load_model(path)
|
113 |
except ValueError as e:
|
114 |
print(f"Error loading model '{path}': {e}")
|
115 |
+
gr.Warning("Model couldn't load. This space currently only works with AutoModelForCausalLM models and for security reasons cannot execute remote code. Please check the model architecture and whether it too recent and requires the execution of custom code.")
|
116 |
return None
|
117 |
except OSError as e:
|
118 |
print(f"Error loading model '{path}': {e}")
|
119 |
+
gr.Warning("Model doesn't seem to exist on the HuggingFace Hub or might be gated. Please check the model name and its accessibility.")
|
120 |
return None
|
121 |
except RuntimeError as e:
|
122 |
if 'out of memory' in str(e):
|
123 |
print(f"Error loading model '{path}': {e}")
|
124 |
+
gr.Warning("Loading the model triggered an out of memory error. It may be too big for the GPU (80Go RAM max). Please verify the size of the model.")
|
125 |
return None
|
126 |
else:
|
127 |
print(f"Error loading model '{path}': {e}")
|
128 |
+
gr.Warning("Model couldn't be loaded. Check the logs for what happened or report an issue including the model's name.")
|
129 |
return None
|
130 |
except Exception as e:
|
131 |
print(f"Error loading model '{path}': {e}")
|
132 |
+
gr.Warning("Model couldn't be loaded. Check the logs for what happened or report an issue including the model's name.")
|
133 |
return None
|
134 |
progress_bar(0.25, desc="Generating data...",total=100)
|
135 |
+
time0 = time.perf_counter()
|
136 |
for i,output in enumerate(llm_run(model,tokenizer,genes,N)):
|
137 |
+
time_elapsed = time.perf_counter()-time0
|
138 |
+
estimated_time_remaining = int(len(genes)*time_elapsed/(i+1))
|
139 |
+
minutes = str(estimated_time_remaining//60)
|
140 |
+
minutes = "0"*(2-min(2,len(minutes))) + minutes
|
141 |
+
seconds = str(estimated_time_remaining%60)
|
142 |
+
seconds = "0"*(2-min(2,len(seconds))) + seconds
|
143 |
+
progress_bar(0.25 + i*(70/len(genes))/100, desc=f"Generating data... {i+1}/{len(genes)} - estimated remaining time {minutes}:{seconds}",total=100)
|
144 |
return output
|
145 |
|
146 |
def run(path,progress_bar):
|
|
|
180 |
progress_bar(0.95, desc="Saving data ...",total=100)
|
181 |
|
182 |
alleles = [[compl[j]['generated_text'][len(gene):][:4] for j in range(len(compl))] for gene,compl in zip(genes,output)]
|
183 |
+
fsave = False
|
184 |
+
for i in range(10): #Trying to push
|
185 |
+
try:
|
186 |
+
save_git(alleles,genes,path,family)
|
187 |
+
fsave = True
|
188 |
+
break
|
189 |
+
except Exception as e:
|
190 |
+
print(f"Error saving data: {e}")
|
191 |
+
#Recloning the repo
|
192 |
+
try:
|
193 |
+
load_data(force_clone=True)
|
194 |
+
except Exception as e:
|
195 |
+
print(f"Error recloning repo: {e}")
|
196 |
+
if not fsave:
|
197 |
+
gr.Warning("Something went wrong with GitHub and data couldn't be sent to the server. Please check the logs. You can save the data manually by clicking the download button and creating a community post with the file or a pull request on the GitHub repository.")
|
198 |
+
def download_data():
|
199 |
+
d = {'family':family,'alleles':alleles}
|
200 |
+
model_name = path
|
201 |
+
data_path = f'math/{model_name}.json'
|
202 |
+
path = os.path.join('Data',data_path)
|
203 |
+
#create the file folder path
|
204 |
+
if not os.path.exists(os.path.dirname(path)):
|
205 |
+
os.makedirs(os.path.dirname(path), exist_ok=True)
|
206 |
+
#Open the file
|
207 |
+
with open(path,'w') as f:
|
208 |
+
json.dump(d,f)
|
209 |
+
# Provide the download link
|
210 |
+
return gr.File.update(value=path, label="Download data", file_name=f"{model_name}.json")
|
211 |
+
gr.Button("Download data",variant="primary").click(fn=download_data, inputs=[], outputs=None)
|
212 |
+
return None
|
213 |
+
|
214 |
|
215 |
progress_bar(1, desc="Done!",total=100)
|
216 |
|
|
|
261 |
|
262 |
|
263 |
# Load environment variables
|
264 |
+
import dotenv
|
265 |
+
dotenv.load_dotenv()
|
266 |
|
267 |
USERNAME = os.environ['GITHUB_USERNAME']
|
268 |
TOKEN = os.environ['GITHUB_TOKEN']
|
|
|
310 |
"- A similarity matrix (values range from 0 = dissimilar to 1 = highly similar). \n"
|
311 |
"- 2D and 3D scatter plots representing how close or far from each other LLMs are (plotted using UMAP). \n"
|
312 |
"- A tree to visualize distances between models (distance from leaf A to leaf B in the tree is similar to the distance between the two models)\n\n"
|
313 |
+
"Models are colored according to their family (e.g., LLaMA, OPT, Mistral) for the ones that were in the original paper. Models added by users are colored in grey for now. "
|
314 |
)
|
315 |
|
316 |
# Load models for the dropdown
|
|
|
363 |
|
364 |
|
365 |
# Submit model section
|
366 |
+
gr.Markdown("## Submit a Model")
|
367 |
|
368 |
gr.Markdown(
|
369 |
"You may contribute new models to this collaborative space using compute resources. "
|
|
|
382 |
)
|
383 |
|
384 |
with gr.Group():
|
385 |
+
model_input = gr.Textbox(label="Model", interactive=True)
|
386 |
+
submit_btn = gr.Button("Run PhyloLM", variant="primary",interactive=True)
|
387 |
|
388 |
|
389 |
# Disclaimer and citation
|
|
|
427 |
tree_alpha_marker.change(fn=lambda x : slider_changeAlphaMarkers(x,'fig4'), inputs=tree_alpha_marker, outputs=FIGS_OBJECTS)
|
428 |
|
429 |
# Run PhyloLM button
|
430 |
+
run_event = submit_btn.click(fn=prepare_run, inputs=[model_input], outputs=[model_input]).then(fn=reload_env, inputs=[], outputs=FIGS_OBJECTS+ [sim_mat_search_x, sim_mat_search_y, viz_search, tree_search])
|
431 |
+
#cancel_btn.click(fn=None,inputs=None,outputs=None,cancels=[run_event])
|
432 |
|
433 |
#Set more globals
|
434 |
SIM_MAT_SEARCH_X = sim_mat_search_x
|
loading.py
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
import os
|
2 |
import ujson as json
|
3 |
import pygit2
|
|
|
|
|
4 |
|
5 |
from phylogeny import compute_all_P, compute_sim_matrix
|
6 |
from plotting import get_color, UNKNOWN_COLOR, DEFAULT_COLOR
|
@@ -47,12 +49,18 @@ def load_data():
|
|
47 |
|
48 |
return data, model_names, families, sim_matrix, colors
|
49 |
|
50 |
-
def load_git():
|
51 |
cred = pygit2.UserPass(os.environ['GITHUB_USERNAME'], os.environ['GITHUB_TOKEN'])
|
52 |
-
if os.path.exists('Data'):
|
|
|
|
|
|
|
|
|
|
|
53 |
repo = pygit2.Repository('Data')
|
54 |
remote = repo.remotes['origin'] # Use named reference instead of index
|
55 |
-
remote.fetch()
|
|
|
56 |
|
57 |
# Get the current branch name
|
58 |
branch_name = repo.head.shorthand
|
@@ -62,10 +70,14 @@ def load_git():
|
|
62 |
|
63 |
# Merge the changes into the current branch
|
64 |
remote_commit = repo.lookup_reference(remote_ref_name).target
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
|
|
|
|
|
|
|
|
69 |
data_array = []
|
70 |
model_names = []
|
71 |
families = []
|
|
|
1 |
import os
|
2 |
import ujson as json
|
3 |
import pygit2
|
4 |
+
import shutil
|
5 |
+
from pygit2.enums import MergeFavor
|
6 |
|
7 |
from phylogeny import compute_all_P, compute_sim_matrix
|
8 |
from plotting import get_color, UNKNOWN_COLOR, DEFAULT_COLOR
|
|
|
49 |
|
50 |
return data, model_names, families, sim_matrix, colors
|
51 |
|
52 |
+
def load_git(force_clone = False):
|
53 |
cred = pygit2.UserPass(os.environ['GITHUB_USERNAME'], os.environ['GITHUB_TOKEN'])
|
54 |
+
if not os.path.exists('Data') or force_clone:
|
55 |
+
# Remove the existing directory if it exists
|
56 |
+
if os.path.exists('Data'):
|
57 |
+
shutil.rmtree('Data')
|
58 |
+
repo = pygit2.clone_repository('https://github.com/PhyloLM/Data', './Data', bare=False, callbacks=GitHubRemoteCallbacks(os.environ['GITHUB_USERNAME'], os.environ['GITHUB_TOKEN']))
|
59 |
+
else:
|
60 |
repo = pygit2.Repository('Data')
|
61 |
remote = repo.remotes['origin'] # Use named reference instead of index
|
62 |
+
fetch_results = remote.fetch()
|
63 |
+
print(fetch_results)
|
64 |
|
65 |
# Get the current branch name
|
66 |
branch_name = repo.head.shorthand
|
|
|
70 |
|
71 |
# Merge the changes into the current branch
|
72 |
remote_commit = repo.lookup_reference(remote_ref_name).target
|
73 |
+
|
74 |
+
#Resolve conflicts if any : strategy : theirs
|
75 |
+
try:
|
76 |
+
repo.merge(remote_commit)
|
77 |
+
except Exception as e:
|
78 |
+
print(f"Merge error: {e}")
|
79 |
+
# Redownload the repository if merge fails
|
80 |
+
return load_git(force_clone=True)
|
81 |
data_array = []
|
82 |
model_names = []
|
83 |
families = []
|