Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -14,6 +14,51 @@ from langchain.document_loaders import PyPDFLoader, TextLoader, JSONLoader, CSVL
|
|
14 |
import tempfile # 임시 파일을 생성하기 위한 라이브러리입니다.
|
15 |
import os
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
# PDF 문서로부터 텍스트를 추출하는 함수입니다.
|
19 |
def get_pdf_text(pdf_docs):
|
@@ -52,11 +97,18 @@ def get_json_file(json_docs):
|
|
52 |
temp_filepath = os.path.join(temp_dir.name, json_docs.name)
|
53 |
with open(temp_filepath, "wb") as f:
|
54 |
f.write(json_docs.getvalue())
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
json_loader = JSONLoader(temp_filepath)
|
56 |
json_doc = json_loader.load()
|
57 |
return json_doc
|
58 |
|
59 |
|
|
|
60 |
# 문서들을 처리하여 텍스트 청크로 나누는 함수입니다.
|
61 |
def get_text_chunks(documents):
|
62 |
text_splitter = RecursiveCharacterTextSplitter(
|
|
|
14 |
import tempfile # 임시 파일을 생성하기 위한 라이브러리입니다.
|
15 |
import os
|
16 |
|
17 |
+
{'image': {'creation_timestamp': 1675549016, 'uri': 'image_of_the_chat.jpg'},
|
18 |
+
'is_still_participant': True,
|
19 |
+
'joinable_mode': {'link': '', 'mode': 1},
|
20 |
+
'magic_words': [],
|
21 |
+
'messages': [{'content': 'Bye!',
|
22 |
+
'sender_name': 'User 2',
|
23 |
+
'timestamp_ms': 1675597571851},
|
24 |
+
{'content': 'Oh no worries! Bye',
|
25 |
+
'sender_name': 'User 1',
|
26 |
+
'timestamp_ms': 1675597435669},
|
27 |
+
{'content': 'No Im sorry it was my mistake, the blue one is not '
|
28 |
+
'for sale',
|
29 |
+
'sender_name': 'User 2',
|
30 |
+
'timestamp_ms': 1675596277579},
|
31 |
+
{'content': 'I thought you were selling the blue one!',
|
32 |
+
'sender_name': 'User 1',
|
33 |
+
'timestamp_ms': 1675595140251},
|
34 |
+
{'content': 'Im not interested in this bag. Im interested in the '
|
35 |
+
'blue one!',
|
36 |
+
'sender_name': 'User 1',
|
37 |
+
'timestamp_ms': 1675595109305},
|
38 |
+
{'content': 'Here is $129',
|
39 |
+
'sender_name': 'User 2',
|
40 |
+
'timestamp_ms': 1675595068468},
|
41 |
+
{'photos': [{'creation_timestamp': 1675595059,
|
42 |
+
'uri': 'url_of_some_picture.jpg'}],
|
43 |
+
'sender_name': 'User 2',
|
44 |
+
'timestamp_ms': 1675595060730},
|
45 |
+
{'content': 'Online is at least $100',
|
46 |
+
'sender_name': 'User 2',
|
47 |
+
'timestamp_ms': 1675595045152},
|
48 |
+
{'content': 'How much do you want?',
|
49 |
+
'sender_name': 'User 1',
|
50 |
+
'timestamp_ms': 1675594799696},
|
51 |
+
{'content': 'Goodmorning! $50 is too low.',
|
52 |
+
'sender_name': 'User 2',
|
53 |
+
'timestamp_ms': 1675577876645},
|
54 |
+
{'content': 'Hi! Im interested in your bag. Im offering $50. Let '
|
55 |
+
'me know if you are interested. Thanks!',
|
56 |
+
'sender_name': 'User 1',
|
57 |
+
'timestamp_ms': 1675549022673}],
|
58 |
+
'participants': [{'name': 'User 1'}, {'name': 'User 2'}],
|
59 |
+
'thread_path': 'inbox/User 1 and User 2 chat',
|
60 |
+
'title': 'User 1 and User 2 chat'}
|
61 |
+
|
62 |
|
63 |
# PDF 문서로부터 텍스트를 추출하는 함수입니다.
|
64 |
def get_pdf_text(pdf_docs):
|
|
|
97 |
temp_filepath = os.path.join(temp_dir.name, json_docs.name)
|
98 |
with open(temp_filepath, "wb") as f:
|
99 |
f.write(json_docs.getvalue())
|
100 |
+
loader = JSONLoader(
|
101 |
+
file_path='./example_data/facebook_chat.json',
|
102 |
+
jq_schema='.messages[].content',
|
103 |
+
text_content=False)
|
104 |
+
|
105 |
+
data = loader.load()
|
106 |
json_loader = JSONLoader(temp_filepath)
|
107 |
json_doc = json_loader.load()
|
108 |
return json_doc
|
109 |
|
110 |
|
111 |
+
|
112 |
# 문서들을 처리하여 텍스트 청크로 나누는 함수입니다.
|
113 |
def get_text_chunks(documents):
|
114 |
text_splitter = RecursiveCharacterTextSplitter(
|