gavinzli commited on
Commit
0c62c4d
·
1 Parent(s): af08824

Enhance GitHub Actions workflow for Hugging Face sync; add Git LFS support and improve email MIME type handling in mail.py

Browse files
.github/workflows/main.yml CHANGED
@@ -1,20 +1,35 @@
1
  name: Sync to Hugging Face hub
 
2
  on:
3
  push:
4
  branches: [main]
5
 
6
- # to run this workflow manually from the Actions tab
7
  workflow_dispatch:
8
 
9
  jobs:
10
  sync-to-hub:
11
  runs-on: ubuntu-latest
12
  steps:
 
13
  - uses: actions/checkout@v3
14
  with:
15
  fetch-depth: 0
16
- # lfs: true
 
 
 
 
 
 
 
 
 
17
  - name: Push to hub
18
  env:
19
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
20
- run: git push https://OxbridegeEcon:[email protected]/spaces/Oxbridge-Economics/Mailbox main --force
 
 
 
 
 
1
  name: Sync to Hugging Face hub
2
+
3
  on:
4
  push:
5
  branches: [main]
6
 
7
+ # To run this workflow manually from the Actions tab
8
  workflow_dispatch:
9
 
10
  jobs:
11
  sync-to-hub:
12
  runs-on: ubuntu-latest
13
  steps:
14
+ # Checkout the repository with Git LFS support
15
  - uses: actions/checkout@v3
16
  with:
17
  fetch-depth: 0
18
+ lfs: true # Enable Git LFS support
19
+
20
+ # Install Git LFS to handle large files
21
+ - name: Install Git LFS
22
+ run: |
23
+ sudo apt-get update
24
+ sudo apt-get install git-lfs
25
+ git lfs install
26
+
27
+ # Push the files to the Hugging Face Hub
28
  - name: Push to hub
29
  env:
30
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
31
+ run: |
32
+ git lfs track "*.sqlite3" # Replace with the extension(s) of your large files, e.g., "*.bin"
33
+ git add .
34
+ git commit -m "Add large files"
35
+ git push https://OxbridegeEcon:[email protected]/spaces/Oxbridge-Economics/Mailbox main --force
app/controllers/mail.py CHANGED
@@ -96,24 +96,24 @@ def list_emails(messages):
96
  print(metadata, msg["payload"]["mimeType"])
97
  ids = []
98
  documents = []
99
- mimeType = []
100
  if msg["payload"]["mimeType"] in [
101
  "multipart/alternative",
102
  "multipart/related",
103
  "multipart/mixed",
104
  ]:
105
- mimeType = []
106
  attach_docs = []
107
  for part in msg["payload"]["parts"]:
108
  print("mimeType: ", part["mimeType"])
109
- mimeType.append(part["mimeType"])
110
- if part["mimeType"] == "text/plain" and "text/html" not in mimeType:
111
  body = base64.urlsafe_b64decode(part["body"]["data"]).decode("utf-8")
112
  body = re.sub(r"<[^>]+>", "", body) # Remove HTML tags
113
  metadata["mimeType"] = part["mimeType"]
114
  documents.append(Document(page_content=body, metadata=metadata))
115
  ids.append(msg["id"])
116
- elif part["mimeType"] == "text/html" and "text/plain" not in mimeType:
117
  body = base64.urlsafe_b64decode(part["body"]["data"]).decode("utf-8")
118
  body = re.sub(r"<[^>]+>", "", body)
119
  metadata["mimeType"] = part["mimeType"]
@@ -192,7 +192,7 @@ def list_emails(messages):
192
  metadata["mimeType"] = msg["payload"]["mimeType"]
193
  documents.append(Document(page_content=body, metadata=metadata))
194
  ids.append(msg["id"])
195
- if "multipart/alternative" in mimeType and len(mimeType) == 1:
196
  print("Only multipart/alternative found in the email.")
197
  else:
198
  vectorstore.add_documents(documents=documents, ids=ids)
 
96
  print(metadata, msg["payload"]["mimeType"])
97
  ids = []
98
  documents = []
99
+ mime_types = []
100
  if msg["payload"]["mimeType"] in [
101
  "multipart/alternative",
102
  "multipart/related",
103
  "multipart/mixed",
104
  ]:
105
+ mime_types = []
106
  attach_docs = []
107
  for part in msg["payload"]["parts"]:
108
  print("mimeType: ", part["mimeType"])
109
+ mime_types.append(part["mimeType"])
110
+ if part["mimeType"] == "text/plain" and "text/html" not in mime_types:
111
  body = base64.urlsafe_b64decode(part["body"]["data"]).decode("utf-8")
112
  body = re.sub(r"<[^>]+>", "", body) # Remove HTML tags
113
  metadata["mimeType"] = part["mimeType"]
114
  documents.append(Document(page_content=body, metadata=metadata))
115
  ids.append(msg["id"])
116
+ elif part["mimeType"] == "text/html" and "text/plain" not in mime_types:
117
  body = base64.urlsafe_b64decode(part["body"]["data"]).decode("utf-8")
118
  body = re.sub(r"<[^>]+>", "", body)
119
  metadata["mimeType"] = part["mimeType"]
 
192
  metadata["mimeType"] = msg["payload"]["mimeType"]
193
  documents.append(Document(page_content=body, metadata=metadata))
194
  ids.append(msg["id"])
195
+ if "multipart/alternative" in mime_types and len(mime_types) == 1:
196
  print("Only multipart/alternative found in the email.")
197
  else:
198
  vectorstore.add_documents(documents=documents, ids=ids)