Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -14,51 +14,6 @@ from langchain.document_loaders import PyPDFLoader, TextLoader, JSONLoader, CSVL
|
|
14 |
import tempfile # 임시 파일을 생성하기 위한 라이브러리입니다.
|
15 |
import os
|
16 |
|
17 |
-
{'image': {'creation_timestamp': 1675549016, 'uri': 'image_of_the_chat.jpg'},
|
18 |
-
'is_still_participant': True,
|
19 |
-
'joinable_mode': {'link': '', 'mode': 1},
|
20 |
-
'magic_words': [],
|
21 |
-
'messages': [{'content': 'Bye!',
|
22 |
-
'sender_name': 'User 2',
|
23 |
-
'timestamp_ms': 1675597571851},
|
24 |
-
{'content': 'Oh no worries! Bye',
|
25 |
-
'sender_name': 'User 1',
|
26 |
-
'timestamp_ms': 1675597435669},
|
27 |
-
{'content': 'No Im sorry it was my mistake, the blue one is not '
|
28 |
-
'for sale',
|
29 |
-
'sender_name': 'User 2',
|
30 |
-
'timestamp_ms': 1675596277579},
|
31 |
-
{'content': 'I thought you were selling the blue one!',
|
32 |
-
'sender_name': 'User 1',
|
33 |
-
'timestamp_ms': 1675595140251},
|
34 |
-
{'content': 'Im not interested in this bag. Im interested in the '
|
35 |
-
'blue one!',
|
36 |
-
'sender_name': 'User 1',
|
37 |
-
'timestamp_ms': 1675595109305},
|
38 |
-
{'content': 'Here is $129',
|
39 |
-
'sender_name': 'User 2',
|
40 |
-
'timestamp_ms': 1675595068468},
|
41 |
-
{'photos': [{'creation_timestamp': 1675595059,
|
42 |
-
'uri': 'url_of_some_picture.jpg'}],
|
43 |
-
'sender_name': 'User 2',
|
44 |
-
'timestamp_ms': 1675595060730},
|
45 |
-
{'content': 'Online is at least $100',
|
46 |
-
'sender_name': 'User 2',
|
47 |
-
'timestamp_ms': 1675595045152},
|
48 |
-
{'content': 'How much do you want?',
|
49 |
-
'sender_name': 'User 1',
|
50 |
-
'timestamp_ms': 1675594799696},
|
51 |
-
{'content': 'Goodmorning! $50 is too low.',
|
52 |
-
'sender_name': 'User 2',
|
53 |
-
'timestamp_ms': 1675577876645},
|
54 |
-
{'content': 'Hi! Im interested in your bag. Im offering $50. Let '
|
55 |
-
'me know if you are interested. Thanks!',
|
56 |
-
'sender_name': 'User 1',
|
57 |
-
'timestamp_ms': 1675549022673}],
|
58 |
-
'participants': [{'name': 'User 1'}, {'name': 'User 2'}],
|
59 |
-
'thread_path': 'inbox/User 1 and User 2 chat',
|
60 |
-
'title': 'User 1 and User 2 chat'}
|
61 |
-
|
62 |
|
63 |
# PDF 문서로부터 텍스트를 추출하는 함수입니다.
|
64 |
def get_pdf_text(pdf_docs):
|
@@ -97,18 +52,11 @@ def get_json_file(json_docs):
|
|
97 |
temp_filepath = os.path.join(temp_dir.name, json_docs.name)
|
98 |
with open(temp_filepath, "wb") as f:
|
99 |
f.write(json_docs.getvalue())
|
100 |
-
loader = JSONLoader(
|
101 |
-
file_path='./example_data/facebook_chat.json',
|
102 |
-
jq_schema='.messages[].content',
|
103 |
-
text_content=False)
|
104 |
-
|
105 |
-
data = loader.load()
|
106 |
json_loader = JSONLoader(temp_filepath)
|
107 |
json_doc = json_loader.load()
|
108 |
return json_doc
|
109 |
|
110 |
|
111 |
-
|
112 |
# 문서들을 처리하여 텍스트 청크로 나누는 함수입니다.
|
113 |
def get_text_chunks(documents):
|
114 |
text_splitter = RecursiveCharacterTextSplitter(
|
|
|
14 |
import tempfile # 임시 파일을 생성하기 위한 라이브러리입니다.
|
15 |
import os
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
# PDF 문서로부터 텍스트를 추출하는 함수입니다.
|
19 |
def get_pdf_text(pdf_docs):
|
|
|
52 |
temp_filepath = os.path.join(temp_dir.name, json_docs.name)
|
53 |
with open(temp_filepath, "wb") as f:
|
54 |
f.write(json_docs.getvalue())
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
json_loader = JSONLoader(temp_filepath)
|
56 |
json_doc = json_loader.load()
|
57 |
return json_doc
|
58 |
|
59 |
|
|
|
60 |
# 문서들을 처리하여 텍스트 청크로 나누는 함수입니다.
|
61 |
def get_text_chunks(documents):
|
62 |
text_splitter = RecursiveCharacterTextSplitter(
|