farmax commited on
Commit
626203f
·
verified ·
1 Parent(s): 95812af

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -29,7 +29,8 @@ def load_doc(list_file_path, chunk_size, chunk_overlap):
29
  return text_splitter.split_documents(pages)
30
 
31
  def create_db(splits, collection_name):
32
- embedding = HuggingFaceEmbeddings()
 
33
  new_client = chromadb.EphemeralClient()
34
  return Chroma.from_documents(documents=splits, embedding=embedding, client=new_client, collection_name=collection_name)
35
 
@@ -112,10 +113,10 @@ def demo():
112
  document = gr.Files(height=100, file_count="multiple", file_types=["pdf"], interactive=True, label="Carica i tuoi documenti PDF")
113
 
114
  with gr.Tab("Passo 2 - Elabora Documenti"):
115
- db_btn = gr.Radio(["ChromaDB"], label="Tipo di database vettoriale", value="ChromaDB", type="index")
116
  with gr.Accordion("Opzioni Avanzate - Divisione del testo del documento", open=False):
117
- slider_chunk_size = gr.Slider(100, 1000, 1000, step=20, label="Dimensione del chunk")
118
- slider_chunk_overlap = gr.Slider(10, 200, 100, step=10, label="Sovrapposizione del chunk")
119
  db_progress = gr.Textbox(label="Inizializzazione del database vettoriale", value="Nessuna")
120
  db_btn = gr.Button("Genera database vettoriale")
121
 
 
29
  return text_splitter.split_documents(pages)
30
 
31
  def create_db(splits, collection_name):
32
+ # Use the lightweight MiniLM model for embeddings
33
+ embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
34
  new_client = chromadb.EphemeralClient()
35
  return Chroma.from_documents(documents=splits, embedding=embedding, client=new_client, collection_name=collection_name)
36
 
 
113
  document = gr.Files(height=100, file_count="multiple", file_types=["pdf"], interactive=True, label="Carica i tuoi documenti PDF")
114
 
115
  with gr.Tab("Passo 2 - Elabora Documenti"):
116
+ db_btn = gr.Radio(["ChromaDB (MiniLM Embedding)"], label="Tipo di database vettoriale", value="ChromaDB (MiniLM Embedding)", type="index")
117
  with gr.Accordion("Opzioni Avanzate - Divisione del testo del documento", open=False):
118
+ slider_chunk_size = gr.Slider(100, 1000, 500, step=20, label="Dimensione del chunk")
119
+ slider_chunk_overlap = gr.Slider(10, 200, 50, step=10, label="Sovrapposizione del chunk")
120
  db_progress = gr.Textbox(label="Inizializzazione del database vettoriale", value="Nessuna")
121
  db_btn = gr.Button("Genera database vettoriale")
122