File size: 1,266 Bytes
dbd33b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from elasticsearch import Elasticsearch
import uuid

class ElasticsearchHandler:
    def __init__(self, host='localhost', port=9200):
        self.es = Elasticsearch([{'host': host, 'port': port}])

    def create_index(self, index_name):
        if not self.es.indices.exists(index=index_name):
            self.es.indices.create(index=index_name)

    def index_document(self, index_name, doc_id, text, embedding):
        body = {
            'text': text,
            'embedding': embedding.tolist()
        }
        self.es.index(index=index_name, id=doc_id, body=body)

    def search(self, index_name, query_vector, top_k=5):
        script_query = {
            "script_score": {
                "query": {"match_all": {}},
                "script": {
                    "source": "cosineSimilarity(params.query_vector, 'embedding') + 1.0",
                    "params": {"query_vector": query_vector.tolist()}
                }
            }
        }
        response = self.es.search(
            index=index_name,
            body={
                "size": top_k,
                "query": script_query,
                "_source": {"includes": ["text"]}
            }
        )
        return [hit["_source"]["text"] for hit in response["hits"]["hits"]]