chaaim123 commited on
Commit
ab5c786
·
verified ·
1 Parent(s): a7e9ffc

Create utils/document_utils.py

Browse files
Files changed (1) hide show
  1. utils/document_utils.py +56 -0
utils/document_utils.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import List
3
+
4
+ logs = []
5
+ class Document:
6
+ def __init__(self, metadata, page_content):
7
+ self.metadata = metadata
8
+ self.page_content = page_content
9
+
10
+ def apply_sentence_keys_documents(relevant_docs: List[Document]):
11
+ result = []
12
+ '''for i, doc in enumerate(relevant_docs):
13
+ doc_id = str(i)
14
+ title_passage = doc.page_content.split('\nPassage: ')
15
+ title = title_passage[0]
16
+ passages = title_passage[1].split('. ')
17
+
18
+ doc_result = []
19
+ doc_result.append([f"{doc_id}a", title])
20
+
21
+ for j, passage in enumerate(passages):
22
+ doc_result.append([f"{doc_id}{chr(98 + j)}", passage])
23
+
24
+ result.append(doc_result)'''
25
+
26
+ for relevant_doc_index, relevant_doc in enumerate(relevant_docs):
27
+ sentences = []
28
+ for sentence_index, sentence in enumerate(relevant_doc.page_content.split(".")):
29
+ sentences.append([str(relevant_doc_index)+chr(97 + sentence_index), sentence])
30
+ result.append(sentences)
31
+
32
+ return result
33
+
34
+ def apply_sentence_keys_response(input_string):
35
+ sentences = input_string.split('. ')
36
+ result = [[chr(97 + i), sentence] for i, sentence in enumerate(sentences)]
37
+ return result
38
+
39
+ def initialize_logging():
40
+ logger = logging.getLogger()
41
+ logger.setLevel(logging.INFO)
42
+
43
+ # Custom log handler to capture logs and add them to the logs list
44
+ class LogHandler(logging.Handler):
45
+ def emit(self, record):
46
+ log_entry = self.format(record)
47
+ logs.append(log_entry)
48
+
49
+ # Add custom log handler to the logger
50
+ log_handler = LogHandler()
51
+ log_handler.setFormatter(logging.Formatter('%(asctime)s - %(message)s'))
52
+ logger.addHandler(log_handler)
53
+
54
+ def get_logs():
55
+ """Retrieve logs for display."""
56
+ return "\n".join(logs[-100:]) # Only show the last 50 logs for example