init commit
Browse files- README.md +31 -35
- modeling.py +3 -2
README.md
CHANGED
@@ -45,15 +45,19 @@ curl https://api.jina.ai/v1/rerank \
|
|
45 |
-H "Authorization: Bearer YOUR_API_KEY" \
|
46 |
-d '{
|
47 |
"model": "jina-reranker-m0",
|
48 |
-
"query": "
|
49 |
"documents": [
|
50 |
-
{"text": "
|
51 |
-
{"text": "
|
52 |
-
{"text": "
|
53 |
-
{"text": "
|
54 |
-
{"text": "
|
55 |
-
{"text": "
|
56 |
-
|
|
|
|
|
|
|
|
|
57 |
],
|
58 |
"top_n": 3
|
59 |
}'
|
@@ -79,42 +83,34 @@ model = AutoModel.from_pretrained(
|
|
79 |
|
80 |
model.to('cuda') # or 'cpu' if no GPU is available
|
81 |
model.eval()
|
|
|
82 |
# Example query and documents
|
83 |
-
query = "
|
84 |
documents = [
|
85 |
-
"
|
86 |
-
"
|
87 |
-
"
|
88 |
-
"
|
89 |
-
"
|
90 |
-
"
|
|
|
|
|
|
|
|
|
91 |
]
|
92 |
|
93 |
# construct sentence pairs
|
94 |
-
|
95 |
|
96 |
-
scores = model.compute_score(
|
97 |
```
|
98 |
|
99 |
The scores will be a list of floats, where each float represents the relevance score of the corresponding document to the query. Higher scores indicate higher relevance.
|
100 |
For instance the returning scores in this case will be:
|
101 |
```bash
|
102 |
-
[0.
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
# Example query and documents
|
108 |
-
query = "What is the ghibli style?"
|
109 |
-
documents = [
|
110 |
-
"https://static0.gamerantimages.com/wordpress/wp-content/uploads/2023/05/doraemon.jpg",
|
111 |
-
"https://static0.gamerantimages.com/wordpress/wp-content/uploads/2024/01/iconic-anime-manga-art-styles-hayao-miyazaki-isao-takahata.jpg",
|
112 |
-
]
|
113 |
-
|
114 |
-
# construct sentence pairs
|
115 |
-
image_pairs = [[query, doc] for doc in documents]
|
116 |
-
|
117 |
-
scores = model.compute_score(image_pairs, max_length=2048, doc_type="image")
|
118 |
-
# [0.9688562154769897, 0.9886682629585266]
|
119 |
```
|
120 |
-
|
|
|
45 |
-H "Authorization: Bearer YOUR_API_KEY" \
|
46 |
-d '{
|
47 |
"model": "jina-reranker-m0",
|
48 |
+
"query": "Organic skincare products for sensitive skin",
|
49 |
"documents": [
|
50 |
+
{"text": "Organic skincare for sensitive skin with aloe vera and chamomile."},
|
51 |
+
{"text": "New makeup trends focus on bold colors and innovative techniques"},
|
52 |
+
{"text": "Bio-Hautpflege für empfindliche Haut mit Aloe Vera und Kamille"},
|
53 |
+
{"text": "Neue Make-up-Trends setzen auf kräftige Farben und innovative Techniken"},
|
54 |
+
{"text": "Cuidado de la piel orgánico para piel sensible con aloe vera y manzanilla"},
|
55 |
+
{"text": "Las nuevas tendencias de maquillaje se centran en colores vivos y técnicas innovadoras"},
|
56 |
+
{"text": "针对敏感肌专门设计的天然有机护肤产品"},
|
57 |
+
{"text": "新的化妆趋势注重鲜艳的颜色和创新的技巧"},
|
58 |
+
{"text": "敏感肌のために特別に設計された天然有機スキンケア製品"},
|
59 |
+
{"text": "新しいメイクのトレンドは鮮やかな色と革新的な技術に焦点を当てています"}
|
60 |
+
|
61 |
],
|
62 |
"top_n": 3
|
63 |
}'
|
|
|
83 |
|
84 |
model.to('cuda') # or 'cpu' if no GPU is available
|
85 |
model.eval()
|
86 |
+
|
87 |
# Example query and documents
|
88 |
+
query = "Organic skincare products for sensitive skin"
|
89 |
documents = [
|
90 |
+
"Organic skincare for sensitive skin with aloe vera and chamomile.",
|
91 |
+
"New makeup trends focus on bold colors and innovative techniques",
|
92 |
+
"Bio-Hautpflege für empfindliche Haut mit Aloe Vera und Kamille",
|
93 |
+
"Neue Make-up-Trends setzen auf kräftige Farben und innovative Techniken",
|
94 |
+
"Cuidado de la piel orgánico para piel sensible con aloe vera y manzanilla",
|
95 |
+
"Las nuevas tendencias de maquillaje se centran en colores vivos y técnicas innovadoras",
|
96 |
+
"针对敏感肌专门设计的天然有机护肤产品",
|
97 |
+
"新的化妆趋势注重鲜艳的颜色和创新的技巧",
|
98 |
+
"敏感肌のために特別に設計された天然有機スキンケア製品",
|
99 |
+
"新しいメイクのトレンドは鮮やかな色と革新的な技術に焦点を当てています",
|
100 |
]
|
101 |
|
102 |
# construct sentence pairs
|
103 |
+
sentence_pairs = [[query, doc] for doc in documents]
|
104 |
|
105 |
+
scores = model.compute_score(sentence_pairs, max_length=10240)
|
106 |
```
|
107 |
|
108 |
The scores will be a list of floats, where each float represents the relevance score of the corresponding document to the query. Higher scores indicate higher relevance.
|
109 |
For instance the returning scores in this case will be:
|
110 |
```bash
|
111 |
+
[0.8311430811882019, 0.09401018172502518,
|
112 |
+
0.6334102749824524, 0.08269733935594559,
|
113 |
+
0.7620701193809509, 0.09947021305561066,
|
114 |
+
0.9263036847114563, 0.05834583938121796,
|
115 |
+
0.8418256044387817, 0.11124119907617569]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
```
|
|
modeling.py
CHANGED
@@ -10,6 +10,7 @@ from transformers.image_utils import load_image
|
|
10 |
|
11 |
logger = logging.getLogger(__name__)
|
12 |
|
|
|
13 |
|
14 |
def load_images(images, lazy_load: bool = True):
|
15 |
# Disable PIL DecompositionBomb threshold for reading large images.
|
@@ -217,8 +218,8 @@ class JinaVLForRanking(Qwen2VLForConditionalGeneration):
|
|
217 |
|
218 |
scores = self.forward(**batch).view(-1).cpu().float().numpy()
|
219 |
|
220 |
-
# normalize scores to [0, 1] with sigmoid
|
221 |
-
scores = 1.0 / (1.0 + np.exp(-scores))
|
222 |
|
223 |
all_scores.extend(scores.tolist())
|
224 |
|
|
|
10 |
|
11 |
logger = logging.getLogger(__name__)
|
12 |
|
13 |
+
LOGIT_SCALE = 0.68
|
14 |
|
15 |
def load_images(images, lazy_load: bool = True):
|
16 |
# Disable PIL DecompositionBomb threshold for reading large images.
|
|
|
218 |
|
219 |
scores = self.forward(**batch).view(-1).cpu().float().numpy()
|
220 |
|
221 |
+
# normalize scores to [0, 1] with sigmoid with a scale
|
222 |
+
scores = 1.0 / (1.0 + np.exp(-scores * LOGIT_SCALE))
|
223 |
|
224 |
all_scores.extend(scores.tolist())
|
225 |
|