flytoe commited on
Commit
5665c6b
·
verified ·
1 Parent(s): b6a1553

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -7,12 +7,12 @@ model_name = "allenai/scibert_scivocab_uncased"
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
  model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3) # z.B. für 3 Kategorien
9
 
10
- # 2️⃣ Dataset laden (ersetze mit deinem Dataset)
11
- dataset = load_dataset("scientific_papers", "arxiv") # Hugging Face Datasets
12
 
13
  # 3️⃣ Tokenisierung der Texte
14
  def tokenize_function(examples):
15
- return tokenizer(examples["abstract"], padding="max_length", truncation=True)
16
 
17
  tokenized_datasets = dataset.map(tokenize_function, batched=True)
18
 
 
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
  model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3) # z.B. für 3 Kategorien
9
 
10
+ # 2️⃣ Dataset laden (armanc/scientific_papers)
11
+ dataset = load_dataset("armanc/scientific_papers")
12
 
13
  # 3️⃣ Tokenisierung der Texte
14
  def tokenize_function(examples):
15
+ return tokenizer(examples["text"], padding="max_length", truncation=True)
16
 
17
  tokenized_datasets = dataset.map(tokenize_function, batched=True)
18