Bor Hodošček commited on
Commit
d84cd62
·
unverified ·
1 Parent(s): 4d52104

feat: remove debug

Browse files
Files changed (1) hide show
  1. app.py +11 -8
app.py CHANGED
@@ -160,7 +160,9 @@ def _(doc, mo, pl):
160
  str(token.morph) for token in doc
161
  ], # To be more precise, this should be merged back in via .to_dict()
162
  "Token Position": list(range(len(doc))),
163
- "Sentence Number": [i for i, sent in enumerate(doc.sents) for token in sent],
 
 
164
  }
165
  )
166
 
@@ -367,7 +369,6 @@ def _(hashlib):
367
  @app.function
368
  def fix_token(token: str) -> str:
369
  """Fix token for display with improved space visualization."""
370
- print(token)
371
  # Replace SentencePiece space marker U+2581 with a middle dot
372
  token = token.replace(" ", "·")
373
  # Replace BPE space marker 'Ġ' with a middle dot
@@ -388,7 +389,6 @@ def get_tokenizer_info(tokenizer):
388
  Extract useful information from a tokenizer.
389
  Returns a dictionary with tokenizer details.
390
  """
391
- print(tokenizer)
392
 
393
  info = {}
394
  try:
@@ -485,7 +485,6 @@ def _(
485
  # Tokenize the input text
486
  # Use tokenize to get string representations for analysis and display
487
  all_tokens = tokenizer.tokenize(current_text)
488
- print(all_tokens)
489
  total_token_count = len(all_tokens)
490
 
491
  # Limit the number of tokens for display to avoid browser slowdown
@@ -528,15 +527,19 @@ def _(
528
  style = f"background-color: {item['colors']['background']}; color: {item['colors']['text']}; padding: 1px 3px; margin: 1px; border-radius: 3px; display: inline-block; white-space: pre-wrap; line-height: 1.4;"
529
  # Add title attribute for hover info (original token + ID)
530
  title = f"Original: {item['original']}\nID: {item['token_id']}"
531
- display_content = str(item["token_id"]) if show_ids_switch.value else item["display"]
 
 
532
  html_parts.append(
533
  f'<span style="{style}" title="{title}">{display_content}</span>'
534
  )
535
 
536
- token_viz_html = mo.Html(f'<div style="line-height: 1.6;">{"".join(html_parts)}</div>')
 
 
537
 
538
- basic_stats = token_stats['basic_stats']
539
- length_stats = token_stats['length_stats']
540
 
541
  basic_stats_md = "**Basic Stats:**\n\n" + "\n".join(
542
  f"- **{key.replace('_', ' ').title()}:** `{value}`"
 
160
  str(token.morph) for token in doc
161
  ], # To be more precise, this should be merged back in via .to_dict()
162
  "Token Position": list(range(len(doc))),
163
+ "Sentence Number": [
164
+ i for i, sent in enumerate(doc.sents) for token in sent
165
+ ],
166
  }
167
  )
168
 
 
369
  @app.function
370
  def fix_token(token: str) -> str:
371
  """Fix token for display with improved space visualization."""
 
372
  # Replace SentencePiece space marker U+2581 with a middle dot
373
  token = token.replace(" ", "·")
374
  # Replace BPE space marker 'Ġ' with a middle dot
 
389
  Extract useful information from a tokenizer.
390
  Returns a dictionary with tokenizer details.
391
  """
 
392
 
393
  info = {}
394
  try:
 
485
  # Tokenize the input text
486
  # Use tokenize to get string representations for analysis and display
487
  all_tokens = tokenizer.tokenize(current_text)
 
488
  total_token_count = len(all_tokens)
489
 
490
  # Limit the number of tokens for display to avoid browser slowdown
 
527
  style = f"background-color: {item['colors']['background']}; color: {item['colors']['text']}; padding: 1px 3px; margin: 1px; border-radius: 3px; display: inline-block; white-space: pre-wrap; line-height: 1.4;"
528
  # Add title attribute for hover info (original token + ID)
529
  title = f"Original: {item['original']}\nID: {item['token_id']}"
530
+ display_content = (
531
+ str(item["token_id"]) if show_ids_switch.value else item["display"]
532
+ )
533
  html_parts.append(
534
  f'<span style="{style}" title="{title}">{display_content}</span>'
535
  )
536
 
537
+ token_viz_html = mo.Html(
538
+ f'<div style="line-height: 1.6;">{"".join(html_parts)}</div>'
539
+ )
540
 
541
+ basic_stats = token_stats["basic_stats"]
542
+ length_stats = token_stats["length_stats"]
543
 
544
  basic_stats_md = "**Basic Stats:**\n\n" + "\n".join(
545
  f"- **{key.replace('_', ' ').title()}:** `{value}`"