SergeyO7's picture
Create app1.py
63219e4 verified
raw
history blame
2.48 kB
import asyncio
from llama_index.core import Document
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core import SimpleDirectoryReader
reader = SimpleDirectoryReader(input_dir=r"C:\Users\so7\AppData\Local\Programs\Python\Python313\RAG")
documents = reader.load_data()
# create the pipeline with transformations
pipeline = IngestionPipeline(
transformations=[
SentenceSplitter(chunk_overlap=0),
HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5"),
]
)
# Define an async function to handle the pipeline
async def main():
# Create the pipeline with transformations
pipeline = IngestionPipeline(
transformations=[
SentenceSplitter(chunk_overlap=0),
HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5"),
]
)
# Use await inside the async function
nodes =
await pipeline.arun(documents=[Document.example()])
# Optional: Do something with the nodes (e.g., print them)
print(nodes)
# Run the async function using asyncio
if __name__ == "__main__":
asyncio.run(main())
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
db = chromadb.PersistentClient(path="./pl_db")
chroma_collection = db.get_or_create_collection("ppgpl")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
pipeline = IngestionPipeline(
transformations=[
SentenceSplitter(chunk_size=25, chunk_overlap=0),
HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5"),
],
vector_store=vector_store,
)
from llama_index.core import VectorStoreIndex
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
index = VectorStoreIndex.from_vector_store(vector_store, embed_model=embed_model)
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
llm = HuggingFaceInferenceAPI(model_name="Qwen/Qwen2.5-Coder-32B-Instruct")
query_engine = index.as_query_engine(
llm=llm,
response_mode="tree_summarize",
)
query_engine.query("Солнце на третей ступени")
# The meaning of life is 42