Spaces:
Running
Running
File size: 1,266 Bytes
dbd33b2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
from elasticsearch import Elasticsearch
import uuid
class ElasticsearchHandler:
def __init__(self, host='localhost', port=9200):
self.es = Elasticsearch([{'host': host, 'port': port}])
def create_index(self, index_name):
if not self.es.indices.exists(index=index_name):
self.es.indices.create(index=index_name)
def index_document(self, index_name, doc_id, text, embedding):
body = {
'text': text,
'embedding': embedding.tolist()
}
self.es.index(index=index_name, id=doc_id, body=body)
def search(self, index_name, query_vector, top_k=5):
script_query = {
"script_score": {
"query": {"match_all": {}},
"script": {
"source": "cosineSimilarity(params.query_vector, 'embedding') + 1.0",
"params": {"query_vector": query_vector.tolist()}
}
}
}
response = self.es.search(
index=index_name,
body={
"size": top_k,
"query": script_query,
"_source": {"includes": ["text"]}
}
)
return [hit["_source"]["text"] for hit in response["hits"]["hits"]] |