flytoe commited on
Commit
cbf6c96
·
verified ·
1 Parent(s): 849f804

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -7,10 +7,10 @@ model_name = "allenai/scibert_scivocab_uncased"
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
  model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)
9
 
10
- # 2️⃣ Dataset laden (armanc/scientific_papers) mit trust_remote_code=True
11
- dataset = load_dataset("armanc/scientific_papers", trust_remote_code=True)
12
 
13
- # 3️⃣ Tokenisierung der Texte (hier wird die Spalte "text" genutzt; ggf. anpassen, falls andere Spalten vorhanden sind)
14
  def tokenize_function(examples):
15
  return tokenizer(examples["text"], padding="max_length", truncation=True)
16
 
 
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
  model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)
9
 
10
+ # 2️⃣ Dataset laden (mit spezifischer Konfiguration: "arxiv" oder "pubmed")
11
+ dataset = load_dataset("armanc/scientific_papers", "arxiv", trust_remote_code=True) # Oder "pubmed"
12
 
13
+ # 3️⃣ Tokenisierung der Texte
14
  def tokenize_function(examples):
15
  return tokenizer(examples["text"], padding="max_length", truncation=True)
16