Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -29,7 +29,8 @@ def load_doc(list_file_path, chunk_size, chunk_overlap):
|
|
29 |
return text_splitter.split_documents(pages)
|
30 |
|
31 |
def create_db(splits, collection_name):
|
32 |
-
|
|
|
33 |
new_client = chromadb.EphemeralClient()
|
34 |
return Chroma.from_documents(documents=splits, embedding=embedding, client=new_client, collection_name=collection_name)
|
35 |
|
@@ -112,10 +113,10 @@ def demo():
|
|
112 |
document = gr.Files(height=100, file_count="multiple", file_types=["pdf"], interactive=True, label="Carica i tuoi documenti PDF")
|
113 |
|
114 |
with gr.Tab("Passo 2 - Elabora Documenti"):
|
115 |
-
db_btn = gr.Radio(["ChromaDB"], label="Tipo di database vettoriale", value="ChromaDB", type="index")
|
116 |
with gr.Accordion("Opzioni Avanzate - Divisione del testo del documento", open=False):
|
117 |
-
slider_chunk_size = gr.Slider(100, 1000,
|
118 |
-
slider_chunk_overlap = gr.Slider(10, 200,
|
119 |
db_progress = gr.Textbox(label="Inizializzazione del database vettoriale", value="Nessuna")
|
120 |
db_btn = gr.Button("Genera database vettoriale")
|
121 |
|
|
|
29 |
return text_splitter.split_documents(pages)
|
30 |
|
31 |
def create_db(splits, collection_name):
|
32 |
+
# Use the lightweight MiniLM model for embeddings
|
33 |
+
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
34 |
new_client = chromadb.EphemeralClient()
|
35 |
return Chroma.from_documents(documents=splits, embedding=embedding, client=new_client, collection_name=collection_name)
|
36 |
|
|
|
113 |
document = gr.Files(height=100, file_count="multiple", file_types=["pdf"], interactive=True, label="Carica i tuoi documenti PDF")
|
114 |
|
115 |
with gr.Tab("Passo 2 - Elabora Documenti"):
|
116 |
+
db_btn = gr.Radio(["ChromaDB (MiniLM Embedding)"], label="Tipo di database vettoriale", value="ChromaDB (MiniLM Embedding)", type="index")
|
117 |
with gr.Accordion("Opzioni Avanzate - Divisione del testo del documento", open=False):
|
118 |
+
slider_chunk_size = gr.Slider(100, 1000, 500, step=20, label="Dimensione del chunk")
|
119 |
+
slider_chunk_overlap = gr.Slider(10, 200, 50, step=10, label="Sovrapposizione del chunk")
|
120 |
db_progress = gr.Textbox(label="Inizializzazione del database vettoriale", value="Nessuna")
|
121 |
db_btn = gr.Button("Genera database vettoriale")
|
122 |
|