Langchained_PGPS_RAG

Sleeping

App Files Files Community

Langchained_PGPS_RAG / app1.py

SergeyO7

Create app1.py

63219e4 verified about 1 month ago

raw

history blame

2.48 kB

	import asyncio
	from llama_index.core import Document
	from llama_index.embeddings.huggingface import HuggingFaceEmbedding
	from llama_index.core.node_parser import SentenceSplitter
	from llama_index.core.ingestion import IngestionPipeline
	from llama_index.core import SimpleDirectoryReader

	reader = SimpleDirectoryReader(input_dir=r"C:\Users\so7\AppData\Local\Programs\Python\Python313\RAG")
	documents = reader.load_data()

	# create the pipeline with transformations
	pipeline = IngestionPipeline(
	transformations=[
	SentenceSplitter(chunk_overlap=0),
	HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5"),
	]
	)

	# Define an async function to handle the pipeline
	async def main():
	# Create the pipeline with transformations
	pipeline = IngestionPipeline(
	transformations=[
	SentenceSplitter(chunk_overlap=0),
	HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5"),
	]
	)
	# Use await inside the async function
	nodes =
	await pipeline.arun(documents=[Document.example()])
	# Optional: Do something with the nodes (e.g., print them)
	print(nodes)

	# Run the async function using asyncio
	if __name__ == "__main__":
	asyncio.run(main())

	import chromadb
	from llama_index.vector_stores.chroma import ChromaVectorStore
	from llama_index.core.ingestion import IngestionPipeline
	from llama_index.core.node_parser import SentenceSplitter
	from llama_index.embeddings.huggingface import HuggingFaceEmbedding

	db = chromadb.PersistentClient(path="./pl_db")
	chroma_collection = db.get_or_create_collection("ppgpl")
	vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

	pipeline = IngestionPipeline(
	transformations=[
	SentenceSplitter(chunk_size=25, chunk_overlap=0),
	HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5"),
	],
	vector_store=vector_store,
	)


	from llama_index.core import VectorStoreIndex
	from llama_index.embeddings.huggingface import HuggingFaceEmbedding

	embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
	index = VectorStoreIndex.from_vector_store(vector_store, embed_model=embed_model)

	from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI

	llm = HuggingFaceInferenceAPI(model_name="Qwen/Qwen2.5-Coder-32B-Instruct")
	query_engine = index.as_query_engine(
	llm=llm,
	response_mode="tree_summarize",
	)
	query_engine.query("Солнце на третей ступени")
	# The meaning of life is 42