Spaces:

anpigon
/

langchain-qa-bot

Runtime error

App Files Files Community

langchain-qa-bot / docs /langchain /libs /community /langchain_community /embeddings /johnsnowlabs.py

anpigon

add langchain docs

ed4d993 11 months ago

raw

history blame

2.87 kB

	import os
	import sys
	from typing import Any, List

	from langchain_core.embeddings import Embeddings
	from langchain_core.pydantic_v1 import BaseModel, Extra


	class JohnSnowLabsEmbeddings(BaseModel, Embeddings):
	"""JohnSnowLabs embedding models

	To use, you should have the ``johnsnowlabs`` python package installed.
	Example:
	.. code-block:: python

	from langchain_community.embeddings.johnsnowlabs import JohnSnowLabsEmbeddings

	embedding = JohnSnowLabsEmbeddings(model='embed_sentence.bert')
	output = embedding.embed_query("foo bar")
	""" # noqa: E501

	model: Any = "embed_sentence.bert"

	def __init__(
	self,
	model: Any = "embed_sentence.bert",
	hardware_target: str = "cpu",
	**kwargs: Any,
	):
	"""Initialize the johnsnowlabs model."""
	super().__init__(**kwargs)
	# 1) Check imports
	try:
	from johnsnowlabs import nlp
	from nlu.pipe.pipeline import NLUPipeline
	except ImportError as exc:
	raise ImportError(
	"Could not import johnsnowlabs python package. "
	"Please install it with `pip install johnsnowlabs`."
	) from exc

	# 2) Start a Spark Session
	try:
	os.environ["PYSPARK_PYTHON"] = sys.executable
	os.environ["PYSPARK_DRIVER_PYTHON"] = sys.executable
	nlp.start(hardware_target=hardware_target)
	except Exception as exc:
	raise Exception("Failure starting Spark Session") from exc

	# 3) Load the model
	try:
	if isinstance(model, str):
	self.model = nlp.load(model)
	elif isinstance(model, NLUPipeline):
	self.model = model
	else:
	self.model = nlp.to_nlu_pipe(model)
	except Exception as exc:
	raise Exception("Failure loading model") from exc

	class Config:
	"""Configuration for this pydantic object."""

	extra = Extra.forbid

	def embed_documents(self, texts: List[str]) -> List[List[float]]:
	"""Compute doc embeddings using a JohnSnowLabs transformer model.

	Args:
	texts: The list of texts to embed.

	Returns:
	List of embeddings, one for each text.
	"""

	df = self.model.predict(texts, output_level="document")
	emb_col = None
	for c in df.columns:
	if "embedding" in c:
	emb_col = c
	return [vec.tolist() for vec in df[emb_col].tolist()]

	def embed_query(self, text: str) -> List[float]:
	"""Compute query embeddings using a JohnSnowLabs transformer model.

	Args:
	text: The text to embed.

	Returns:
	Embeddings for the text.
	"""
	return self.embed_documents([text])[0]