farmax commited on
Commit
a8cd24e
·
verified ·
1 Parent(s): 93d7cc3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -14
app.py CHANGED
@@ -34,6 +34,8 @@ list_llm = ["google/gemma-7b-it", "mistralai/Mistral-7B-Instruct-v0.2"]
34
  def initialize_database(document, chunk_size, chunk_overlap, progress=gr.Progress()):
35
  logger.info("Initializing database...")
36
  documents = []
 
 
37
  for file in document:
38
  try:
39
  loader = UnstructuredPDFLoader(file.name)
@@ -47,11 +49,10 @@ def initialize_database(document, chunk_size, chunk_overlap, progress=gr.Progres
47
  logger.error("Impossibile caricare il documento PDF. Assicurati di aver installato 'unstructured' o 'pypdf'.")
48
  return None, "Errore: Pacchetti necessari non installati. Esegui 'pip install unstructured pypdf' e riprova."
49
 
50
- splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
51
  for doc in docs:
52
- pages = splitter.split_document(doc)
53
- for page in pages:
54
- documents.append(Document(page_content=page.page_content, metadata={"filename": file.name}))
55
 
56
  if not documents:
57
  return None, "Errore: Nessun documento caricato correttamente."
@@ -60,6 +61,7 @@ def initialize_database(document, chunk_size, chunk_overlap, progress=gr.Progres
60
  progress.update(0.5)
61
  logger.info("Database initialized successfully.")
62
  return vectorstore, "Initialized"
 
63
 
64
  def initialize_LLM(llm_option, llm_temperature, max_tokens, top_k, vector_db, progress=gr.Progress(), language="italian"):
65
  logger.info("Initializing LLM chain...")
@@ -129,16 +131,6 @@ def conversation(qa_chain, message, history, language):
129
 
130
  def demo():
131
  with gr.Blocks(theme="base") as demo:
132
- gr.Markdown(
133
- """
134
- ## Importante: Installazione dei pacchetti necessari
135
- Prima di utilizzare questa applicazione, assicurati di aver installato i seguenti pacchetti:
136
- ```
137
- pip install unstructured pypdf
138
- ```
139
- Questi pacchetti sono necessari per il corretto funzionamento del caricamento dei documenti PDF.
140
- """
141
- )
142
 
143
  vector_db = gr.State()
144
  qa_chain = gr.State()
 
34
  def initialize_database(document, chunk_size, chunk_overlap, progress=gr.Progress()):
35
  logger.info("Initializing database...")
36
  documents = []
37
+ splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
38
+
39
  for file in document:
40
  try:
41
  loader = UnstructuredPDFLoader(file.name)
 
49
  logger.error("Impossibile caricare il documento PDF. Assicurati di aver installato 'unstructured' o 'pypdf'.")
50
  return None, "Errore: Pacchetti necessari non installati. Esegui 'pip install unstructured pypdf' e riprova."
51
 
 
52
  for doc in docs:
53
+ text_chunks = splitter.split_text(doc.page_content)
54
+ for chunk in text_chunks:
55
+ documents.append(Document(page_content=chunk, metadata={"filename": file.name, "page": doc.metadata.get("page", 0)}))
56
 
57
  if not documents:
58
  return None, "Errore: Nessun documento caricato correttamente."
 
61
  progress.update(0.5)
62
  logger.info("Database initialized successfully.")
63
  return vectorstore, "Initialized"
64
+
65
 
66
  def initialize_LLM(llm_option, llm_temperature, max_tokens, top_k, vector_db, progress=gr.Progress(), language="italian"):
67
  logger.info("Initializing LLM chain...")
 
131
 
132
  def demo():
133
  with gr.Blocks(theme="base") as demo:
 
 
 
 
 
 
 
 
 
 
134
 
135
  vector_db = gr.State()
136
  qa_chain = gr.State()