Spaces:
Sleeping
Sleeping
File size: 2,481 Bytes
63219e4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import asyncio
from llama_index.core import Document
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core import SimpleDirectoryReader
reader = SimpleDirectoryReader(input_dir=r"C:\Users\so7\AppData\Local\Programs\Python\Python313\RAG")
documents = reader.load_data()
# create the pipeline with transformations
pipeline = IngestionPipeline(
transformations=[
SentenceSplitter(chunk_overlap=0),
HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5"),
]
)
# Define an async function to handle the pipeline
async def main():
# Create the pipeline with transformations
pipeline = IngestionPipeline(
transformations=[
SentenceSplitter(chunk_overlap=0),
HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5"),
]
)
# Use await inside the async function
nodes =
await pipeline.arun(documents=[Document.example()])
# Optional: Do something with the nodes (e.g., print them)
print(nodes)
# Run the async function using asyncio
if __name__ == "__main__":
asyncio.run(main())
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
db = chromadb.PersistentClient(path="./pl_db")
chroma_collection = db.get_or_create_collection("ppgpl")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
pipeline = IngestionPipeline(
transformations=[
SentenceSplitter(chunk_size=25, chunk_overlap=0),
HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5"),
],
vector_store=vector_store,
)
from llama_index.core import VectorStoreIndex
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
index = VectorStoreIndex.from_vector_store(vector_store, embed_model=embed_model)
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
llm = HuggingFaceInferenceAPI(model_name="Qwen/Qwen2.5-Coder-32B-Instruct")
query_engine = index.as_query_engine(
llm=llm,
response_mode="tree_summarize",
)
query_engine.query("Солнце на третей ступени")
# The meaning of life is 42 |