nouamanetazi HF Staff commited on
Commit
8cafaac
·
verified ·
1 Parent(s): c45c066

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -16
app.py CHANGED
@@ -68,7 +68,7 @@ scheduler = CommitScheduler(
68
  repo_type="dataset",
69
  folder_path=submit_file.parent,
70
  path_in_repo="data",
71
- every=5,
72
  token=token
73
  )
74
  logger.info(f"Initialized CommitScheduler for repo: atlasia/atlaset_inference_ds")
@@ -79,9 +79,8 @@ usage_stats = {
79
  "total_tokens_generated": 0,
80
  "start_time": time.time()
81
  }
82
-
83
  @spaces.GPU
84
- def generate_text(prompt, max_length=256, temperature=0.7, top_p=0.9, top_k=150, num_beams=8, repetition_penalty=1.5):
85
  if not prompt.strip():
86
  logger.warning("Empty prompt submitted")
87
  return "", "الرجاء إدخال نص للتوليد (Please enter text to generate)"
@@ -91,25 +90,34 @@ def generate_text(prompt, max_length=256, temperature=0.7, top_p=0.9, top_k=150,
91
 
92
  start_time = time.time()
93
 
 
 
 
94
  # Tokenize input
95
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
 
96
 
97
  # Generate text
98
- output = model.generate(
99
- **inputs,
100
- max_length=max_length,
101
- temperature=temperature,
102
- top_p=top_p,
103
- do_sample=True,
104
- repetition_penalty=repetition_penalty,
105
- num_beams=num_beams,
106
- top_k=top_k,
107
- early_stopping=True,
108
- pad_token_id=tokenizer.pad_token_id,
109
- eos_token_id=tokenizer.eos_token_id,
110
- )
 
 
 
 
111
 
112
  # Decode output
 
113
  result = tokenizer.decode(output[0], skip_special_tokens=True)
114
 
115
  # Update stats
@@ -140,6 +148,8 @@ def generate_text(prompt, max_length=256, temperature=0.7, top_p=0.9, top_k=150,
140
  }
141
  )
142
 
 
 
143
  return result, f"تم توليد {token_count} رمز في {generation_time:.2f} ثانية (Generated {token_count} tokens in {generation_time:.2f} seconds)"
144
 
145
  def save_feedback(input, output, params) -> None:
 
68
  repo_type="dataset",
69
  folder_path=submit_file.parent,
70
  path_in_repo="data",
71
+ every=1,
72
  token=token
73
  )
74
  logger.info(f"Initialized CommitScheduler for repo: atlasia/atlaset_inference_ds")
 
79
  "total_tokens_generated": 0,
80
  "start_time": time.time()
81
  }
 
82
  @spaces.GPU
83
+ def generate_text(prompt, max_length=256, temperature=0.7, top_p=0.9, top_k=150, num_beams=8, repetition_penalty=1.5, progress=gr.Progress()):
84
  if not prompt.strip():
85
  logger.warning("Empty prompt submitted")
86
  return "", "الرجاء إدخال نص للتوليد (Please enter text to generate)"
 
90
 
91
  start_time = time.time()
92
 
93
+ # Start progress
94
+ progress(0, desc="تجهيز النموذج (Preparing model)")
95
+
96
  # Tokenize input
97
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
98
+ progress(0.1, desc="تحليل النص (Tokenizing)")
99
 
100
  # Generate text
101
+ # Since we can't track token generation directly, we'll create artificial steps
102
+ steps = 10 # Divide generation into 10 steps
103
+ for i in progress.tqdm(range(steps), desc="توليد النص (Generating text)"):
104
+ if i == 0: # Only generate on the first step
105
+ output = model.generate(
106
+ **inputs,
107
+ max_length=max_length,
108
+ temperature=temperature,
109
+ top_p=top_p,
110
+ do_sample=True,
111
+ repetition_penalty=repetition_penalty,
112
+ num_beams=num_beams,
113
+ top_k=top_k,
114
+ early_stopping=True,
115
+ pad_token_id=tokenizer.pad_token_id,
116
+ eos_token_id=tokenizer.eos_token_id,
117
+ )
118
 
119
  # Decode output
120
+ progress(0.9, desc="معالجة النتائج (Processing results)")
121
  result = tokenizer.decode(output[0], skip_special_tokens=True)
122
 
123
  # Update stats
 
148
  }
149
  )
150
 
151
+ progress(1.0, desc="اكتمل (Complete)")
152
+
153
  return result, f"تم توليد {token_count} رمز في {generation_time:.2f} ثانية (Generated {token_count} tokens in {generation_time:.2f} seconds)"
154
 
155
  def save_feedback(input, output, params) -> None: