Spaces:
Sleeping
Sleeping
File size: 717 Bytes
b18a9b9 ffb2a92 b18a9b9 ffb2a92 b18a9b9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 |
from sentence_transformers import SentenceTransformer
import numpy as np
import pandas as pd
paper_df = pd.read_csv('anlp2025.tsv', names=["pid", "title"], sep="\t")
assert len(paper_df) == 777
input_texts = [title for title in paper_df["title"].tolist()]
assert input_texts[0] == "LLMのアテンションヘッドに着目したジェイルブレイク攻撃の分析と防御手法の提案"
assert input_texts[-1] == "ニュース記事中の企業名のEntity LinkingにおけるQuestion Answeringを用いた曖昧性解消"
model = SentenceTransformer("sbintuitions/sarashina-embedding-v1-1b")
embeddings = model.encode(input_texts)
assert embeddings.shape == (777, 1792)
np.savez("anlp2025", embeddings)
|