Daoneeee commited on
Commit
15e44b9
·
1 Parent(s): 3a75faa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -0
app.py CHANGED
@@ -14,6 +14,51 @@ from langchain.document_loaders import PyPDFLoader, TextLoader, JSONLoader, CSVL
14
  import tempfile # 임시 파일을 생성하기 위한 라이브러리입니다.
15
  import os
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  # PDF 문서로부터 텍스트를 추출하는 함수입니다.
19
  def get_pdf_text(pdf_docs):
@@ -52,11 +97,18 @@ def get_json_file(json_docs):
52
  temp_filepath = os.path.join(temp_dir.name, json_docs.name)
53
  with open(temp_filepath, "wb") as f:
54
  f.write(json_docs.getvalue())
 
 
 
 
 
 
55
  json_loader = JSONLoader(temp_filepath)
56
  json_doc = json_loader.load()
57
  return json_doc
58
 
59
 
 
60
  # 문서들을 처리하여 텍스트 청크로 나누는 함수입니다.
61
  def get_text_chunks(documents):
62
  text_splitter = RecursiveCharacterTextSplitter(
 
14
  import tempfile # 임시 파일을 생성하기 위한 라이브러리입니다.
15
  import os
16
 
17
+ {'image': {'creation_timestamp': 1675549016, 'uri': 'image_of_the_chat.jpg'},
18
+ 'is_still_participant': True,
19
+ 'joinable_mode': {'link': '', 'mode': 1},
20
+ 'magic_words': [],
21
+ 'messages': [{'content': 'Bye!',
22
+ 'sender_name': 'User 2',
23
+ 'timestamp_ms': 1675597571851},
24
+ {'content': 'Oh no worries! Bye',
25
+ 'sender_name': 'User 1',
26
+ 'timestamp_ms': 1675597435669},
27
+ {'content': 'No Im sorry it was my mistake, the blue one is not '
28
+ 'for sale',
29
+ 'sender_name': 'User 2',
30
+ 'timestamp_ms': 1675596277579},
31
+ {'content': 'I thought you were selling the blue one!',
32
+ 'sender_name': 'User 1',
33
+ 'timestamp_ms': 1675595140251},
34
+ {'content': 'Im not interested in this bag. Im interested in the '
35
+ 'blue one!',
36
+ 'sender_name': 'User 1',
37
+ 'timestamp_ms': 1675595109305},
38
+ {'content': 'Here is $129',
39
+ 'sender_name': 'User 2',
40
+ 'timestamp_ms': 1675595068468},
41
+ {'photos': [{'creation_timestamp': 1675595059,
42
+ 'uri': 'url_of_some_picture.jpg'}],
43
+ 'sender_name': 'User 2',
44
+ 'timestamp_ms': 1675595060730},
45
+ {'content': 'Online is at least $100',
46
+ 'sender_name': 'User 2',
47
+ 'timestamp_ms': 1675595045152},
48
+ {'content': 'How much do you want?',
49
+ 'sender_name': 'User 1',
50
+ 'timestamp_ms': 1675594799696},
51
+ {'content': 'Goodmorning! $50 is too low.',
52
+ 'sender_name': 'User 2',
53
+ 'timestamp_ms': 1675577876645},
54
+ {'content': 'Hi! Im interested in your bag. Im offering $50. Let '
55
+ 'me know if you are interested. Thanks!',
56
+ 'sender_name': 'User 1',
57
+ 'timestamp_ms': 1675549022673}],
58
+ 'participants': [{'name': 'User 1'}, {'name': 'User 2'}],
59
+ 'thread_path': 'inbox/User 1 and User 2 chat',
60
+ 'title': 'User 1 and User 2 chat'}
61
+
62
 
63
  # PDF 문서로부터 텍스트를 추출하는 함수입니다.
64
  def get_pdf_text(pdf_docs):
 
97
  temp_filepath = os.path.join(temp_dir.name, json_docs.name)
98
  with open(temp_filepath, "wb") as f:
99
  f.write(json_docs.getvalue())
100
+ loader = JSONLoader(
101
+ file_path='./example_data/facebook_chat.json',
102
+ jq_schema='.messages[].content',
103
+ text_content=False)
104
+
105
+ data = loader.load()
106
  json_loader = JSONLoader(temp_filepath)
107
  json_doc = json_loader.load()
108
  return json_doc
109
 
110
 
111
+
112
  # 문서들을 처리하여 텍스트 청크로 나누는 함수입니다.
113
  def get_text_chunks(documents):
114
  text_splitter = RecursiveCharacterTextSplitter(