Daetheys commited on
Commit
dc83bf2
·
1 Parent(s): 114995f

Fixed git push issue

Browse files
Files changed (2) hide show
  1. app.py +56 -14
  2. loading.py +19 -7
app.py CHANGED
@@ -2,6 +2,8 @@ import gradio as gr
2
  import os
3
  import numpy as np
4
  import ujson as json
 
 
5
 
6
  from loading import load_data, save_git
7
  from tools import compute_ordered_matrix
@@ -101,8 +103,8 @@ def search_bar_gr(model_names,slider=True,double_search=False,key=None):
101
  ret.insert(0,col2)
102
  return ret
103
 
104
- import spaces
105
- @spaces.GPU(duration=300)
106
  def _run(path,genes,N,progress_bar):
107
  #Load the model
108
  progress_bar(0.20, desc="Loading Model...",total=100)
@@ -110,28 +112,35 @@ def _run(path,genes,N,progress_bar):
110
  model,tokenizer = load_model(path)
111
  except ValueError as e:
112
  print(f"Error loading model '{path}': {e}")
113
- gr.Warning("Model couldn't load. This space currently only works with AutoModelForCausalLM models and trust_remote_code=False. Please check the model architecture and whether it requires the execution of custom code and try again.")
114
  return None
115
  except OSError as e:
116
  print(f"Error loading model '{path}': {e}")
117
- gr.Warning("Model doesn't seem to exist on the HuggingFace Hub or might be gated. Please check the model name and try again.")
118
  return None
119
  except RuntimeError as e:
120
  if 'out of memory' in str(e):
121
  print(f"Error loading model '{path}': {e}")
122
- gr.Warning("Loading the model triggered an out of memory error. It may be too big for the GPU (80Go RAM). Please try again with a smaller model.")
123
  return None
124
  else:
125
  print(f"Error loading model '{path}': {e}")
126
- gr.Warning("Model couldn't be loaded. Please check the logs or report an issue.")
127
  return None
128
  except Exception as e:
129
  print(f"Error loading model '{path}': {e}")
130
- gr.Warning("Model couldn't be loaded. Please check logs or report an issue.")
131
  return None
132
  progress_bar(0.25, desc="Generating data...",total=100)
 
133
  for i,output in enumerate(llm_run(model,tokenizer,genes,N)):
134
- progress_bar(0.25 + i*(70/len(genes))/100, desc=f"Generating data... {i+1}/{len(genes)}",total=100)
 
 
 
 
 
 
135
  return output
136
 
137
  def run(path,progress_bar):
@@ -171,7 +180,37 @@ def run(path,progress_bar):
171
  progress_bar(0.95, desc="Saving data ...",total=100)
172
 
173
  alleles = [[compl[j]['generated_text'][len(gene):][:4] for j in range(len(compl))] for gene,compl in zip(genes,output)]
174
- save_git(alleles,genes,path,family)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
 
176
  progress_bar(1, desc="Done!",total=100)
177
 
@@ -222,6 +261,8 @@ def reload_env():
222
 
223
 
224
  # Load environment variables
 
 
225
 
226
  USERNAME = os.environ['GITHUB_USERNAME']
227
  TOKEN = os.environ['GITHUB_TOKEN']
@@ -269,7 +310,7 @@ with gr.Blocks(title="PhyloLM", theme=gr.themes.Default()) as demo:
269
  "- A similarity matrix (values range from 0 = dissimilar to 1 = highly similar). \n"
270
  "- 2D and 3D scatter plots representing how close or far from each other LLMs are (plotted using UMAP). \n"
271
  "- A tree to visualize distances between models (distance from leaf A to leaf B in the tree is similar to the distance between the two models)\n\n"
272
- "Models are colored according to their family (e.g., LLaMA, OPT, Mistral) for the ones that were in the original paper. Newly added models by users will be colored in grey. "
273
  )
274
 
275
  # Load models for the dropdown
@@ -322,7 +363,7 @@ with gr.Blocks(title="PhyloLM", theme=gr.themes.Default()) as demo:
322
 
323
 
324
  # Submit model section
325
- gr.Markdown("## Submitting a Model")
326
 
327
  gr.Markdown(
328
  "You may contribute new models to this collaborative space using compute resources. "
@@ -341,8 +382,8 @@ with gr.Blocks(title="PhyloLM", theme=gr.themes.Default()) as demo:
341
  )
342
 
343
  with gr.Group():
344
- model_input = gr.Textbox(label="Model", interactive=False)
345
- submit_btn = gr.Button("Run PhyloLM", variant="primary",interactive=False)
346
 
347
 
348
  # Disclaimer and citation
@@ -386,7 +427,8 @@ url={https://openreview.net/forum?id=rTQNGQxm4K}
386
  tree_alpha_marker.change(fn=lambda x : slider_changeAlphaMarkers(x,'fig4'), inputs=tree_alpha_marker, outputs=FIGS_OBJECTS)
387
 
388
  # Run PhyloLM button
389
- submit_btn.click(fn=prepare_run, inputs=[model_input], outputs=[model_input]).then(fn=reload_env, inputs=[], outputs=FIGS_OBJECTS+ [sim_mat_search_x, sim_mat_search_y, viz_search, tree_search])
 
390
 
391
  #Set more globals
392
  SIM_MAT_SEARCH_X = sim_mat_search_x
 
2
  import os
3
  import numpy as np
4
  import ujson as json
5
+ import time
6
+ from threading import Thread
7
 
8
  from loading import load_data, save_git
9
  from tools import compute_ordered_matrix
 
103
  ret.insert(0,col2)
104
  return ret
105
 
106
+ #import spaces
107
+ #@spaces.GPU(duration=300)
108
  def _run(path,genes,N,progress_bar):
109
  #Load the model
110
  progress_bar(0.20, desc="Loading Model...",total=100)
 
112
  model,tokenizer = load_model(path)
113
  except ValueError as e:
114
  print(f"Error loading model '{path}': {e}")
115
+ gr.Warning("Model couldn't load. This space currently only works with AutoModelForCausalLM models and for security reasons cannot execute remote code. Please check the model architecture and whether it too recent and requires the execution of custom code.")
116
  return None
117
  except OSError as e:
118
  print(f"Error loading model '{path}': {e}")
119
+ gr.Warning("Model doesn't seem to exist on the HuggingFace Hub or might be gated. Please check the model name and its accessibility.")
120
  return None
121
  except RuntimeError as e:
122
  if 'out of memory' in str(e):
123
  print(f"Error loading model '{path}': {e}")
124
+ gr.Warning("Loading the model triggered an out of memory error. It may be too big for the GPU (80Go RAM max). Please verify the size of the model.")
125
  return None
126
  else:
127
  print(f"Error loading model '{path}': {e}")
128
+ gr.Warning("Model couldn't be loaded. Check the logs for what happened or report an issue including the model's name.")
129
  return None
130
  except Exception as e:
131
  print(f"Error loading model '{path}': {e}")
132
+ gr.Warning("Model couldn't be loaded. Check the logs for what happened or report an issue including the model's name.")
133
  return None
134
  progress_bar(0.25, desc="Generating data...",total=100)
135
+ time0 = time.perf_counter()
136
  for i,output in enumerate(llm_run(model,tokenizer,genes,N)):
137
+ time_elapsed = time.perf_counter()-time0
138
+ estimated_time_remaining = int(len(genes)*time_elapsed/(i+1))
139
+ minutes = str(estimated_time_remaining//60)
140
+ minutes = "0"*(2-min(2,len(minutes))) + minutes
141
+ seconds = str(estimated_time_remaining%60)
142
+ seconds = "0"*(2-min(2,len(seconds))) + seconds
143
+ progress_bar(0.25 + i*(70/len(genes))/100, desc=f"Generating data... {i+1}/{len(genes)} - estimated remaining time {minutes}:{seconds}",total=100)
144
  return output
145
 
146
  def run(path,progress_bar):
 
180
  progress_bar(0.95, desc="Saving data ...",total=100)
181
 
182
  alleles = [[compl[j]['generated_text'][len(gene):][:4] for j in range(len(compl))] for gene,compl in zip(genes,output)]
183
+ fsave = False
184
+ for i in range(10): #Trying to push
185
+ try:
186
+ save_git(alleles,genes,path,family)
187
+ fsave = True
188
+ break
189
+ except Exception as e:
190
+ print(f"Error saving data: {e}")
191
+ #Recloning the repo
192
+ try:
193
+ load_data(force_clone=True)
194
+ except Exception as e:
195
+ print(f"Error recloning repo: {e}")
196
+ if not fsave:
197
+ gr.Warning("Something went wrong with GitHub and data couldn't be sent to the server. Please check the logs. You can save the data manually by clicking the download button and creating a community post with the file or a pull request on the GitHub repository.")
198
+ def download_data():
199
+ d = {'family':family,'alleles':alleles}
200
+ model_name = path
201
+ data_path = f'math/{model_name}.json'
202
+ path = os.path.join('Data',data_path)
203
+ #create the file folder path
204
+ if not os.path.exists(os.path.dirname(path)):
205
+ os.makedirs(os.path.dirname(path), exist_ok=True)
206
+ #Open the file
207
+ with open(path,'w') as f:
208
+ json.dump(d,f)
209
+ # Provide the download link
210
+ return gr.File.update(value=path, label="Download data", file_name=f"{model_name}.json")
211
+ gr.Button("Download data",variant="primary").click(fn=download_data, inputs=[], outputs=None)
212
+ return None
213
+
214
 
215
  progress_bar(1, desc="Done!",total=100)
216
 
 
261
 
262
 
263
  # Load environment variables
264
+ import dotenv
265
+ dotenv.load_dotenv()
266
 
267
  USERNAME = os.environ['GITHUB_USERNAME']
268
  TOKEN = os.environ['GITHUB_TOKEN']
 
310
  "- A similarity matrix (values range from 0 = dissimilar to 1 = highly similar). \n"
311
  "- 2D and 3D scatter plots representing how close or far from each other LLMs are (plotted using UMAP). \n"
312
  "- A tree to visualize distances between models (distance from leaf A to leaf B in the tree is similar to the distance between the two models)\n\n"
313
+ "Models are colored according to their family (e.g., LLaMA, OPT, Mistral) for the ones that were in the original paper. Models added by users are colored in grey for now. "
314
  )
315
 
316
  # Load models for the dropdown
 
363
 
364
 
365
  # Submit model section
366
+ gr.Markdown("## Submit a Model")
367
 
368
  gr.Markdown(
369
  "You may contribute new models to this collaborative space using compute resources. "
 
382
  )
383
 
384
  with gr.Group():
385
+ model_input = gr.Textbox(label="Model", interactive=True)
386
+ submit_btn = gr.Button("Run PhyloLM", variant="primary",interactive=True)
387
 
388
 
389
  # Disclaimer and citation
 
427
  tree_alpha_marker.change(fn=lambda x : slider_changeAlphaMarkers(x,'fig4'), inputs=tree_alpha_marker, outputs=FIGS_OBJECTS)
428
 
429
  # Run PhyloLM button
430
+ run_event = submit_btn.click(fn=prepare_run, inputs=[model_input], outputs=[model_input]).then(fn=reload_env, inputs=[], outputs=FIGS_OBJECTS+ [sim_mat_search_x, sim_mat_search_y, viz_search, tree_search])
431
+ #cancel_btn.click(fn=None,inputs=None,outputs=None,cancels=[run_event])
432
 
433
  #Set more globals
434
  SIM_MAT_SEARCH_X = sim_mat_search_x
loading.py CHANGED
@@ -1,6 +1,8 @@
1
  import os
2
  import ujson as json
3
  import pygit2
 
 
4
 
5
  from phylogeny import compute_all_P, compute_sim_matrix
6
  from plotting import get_color, UNKNOWN_COLOR, DEFAULT_COLOR
@@ -47,12 +49,18 @@ def load_data():
47
 
48
  return data, model_names, families, sim_matrix, colors
49
 
50
- def load_git():
51
  cred = pygit2.UserPass(os.environ['GITHUB_USERNAME'], os.environ['GITHUB_TOKEN'])
52
- if os.path.exists('Data'):
 
 
 
 
 
53
  repo = pygit2.Repository('Data')
54
  remote = repo.remotes['origin'] # Use named reference instead of index
55
- remote.fetch()
 
56
 
57
  # Get the current branch name
58
  branch_name = repo.head.shorthand
@@ -62,10 +70,14 @@ def load_git():
62
 
63
  # Merge the changes into the current branch
64
  remote_commit = repo.lookup_reference(remote_ref_name).target
65
-
66
- else:
67
- repo = pygit2.clone_repository('https://github.com/PhyloLM/Data', './Data', bare=False, callbacks=GitHubRemoteCallbacks(os.environ['GITHUB_USERNAME'], os.environ['GITHUB_TOKEN']))
68
-
 
 
 
 
69
  data_array = []
70
  model_names = []
71
  families = []
 
1
  import os
2
  import ujson as json
3
  import pygit2
4
+ import shutil
5
+ from pygit2.enums import MergeFavor
6
 
7
  from phylogeny import compute_all_P, compute_sim_matrix
8
  from plotting import get_color, UNKNOWN_COLOR, DEFAULT_COLOR
 
49
 
50
  return data, model_names, families, sim_matrix, colors
51
 
52
+ def load_git(force_clone = False):
53
  cred = pygit2.UserPass(os.environ['GITHUB_USERNAME'], os.environ['GITHUB_TOKEN'])
54
+ if not os.path.exists('Data') or force_clone:
55
+ # Remove the existing directory if it exists
56
+ if os.path.exists('Data'):
57
+ shutil.rmtree('Data')
58
+ repo = pygit2.clone_repository('https://github.com/PhyloLM/Data', './Data', bare=False, callbacks=GitHubRemoteCallbacks(os.environ['GITHUB_USERNAME'], os.environ['GITHUB_TOKEN']))
59
+ else:
60
  repo = pygit2.Repository('Data')
61
  remote = repo.remotes['origin'] # Use named reference instead of index
62
+ fetch_results = remote.fetch()
63
+ print(fetch_results)
64
 
65
  # Get the current branch name
66
  branch_name = repo.head.shorthand
 
70
 
71
  # Merge the changes into the current branch
72
  remote_commit = repo.lookup_reference(remote_ref_name).target
73
+
74
+ #Resolve conflicts if any : strategy : theirs
75
+ try:
76
+ repo.merge(remote_commit)
77
+ except Exception as e:
78
+ print(f"Merge error: {e}")
79
+ # Redownload the repository if merge fails
80
+ return load_git(force_clone=True)
81
  data_array = []
82
  model_names = []
83
  families = []