Spaces:
Running
Running
Enhance GitHub Actions workflow for Hugging Face sync; add Git LFS support and improve email MIME type handling in mail.py
Browse files- .github/workflows/main.yml +18 -3
- app/controllers/mail.py +6 -6
.github/workflows/main.yml
CHANGED
@@ -1,20 +1,35 @@
|
|
1 |
name: Sync to Hugging Face hub
|
|
|
2 |
on:
|
3 |
push:
|
4 |
branches: [main]
|
5 |
|
6 |
-
#
|
7 |
workflow_dispatch:
|
8 |
|
9 |
jobs:
|
10 |
sync-to-hub:
|
11 |
runs-on: ubuntu-latest
|
12 |
steps:
|
|
|
13 |
- uses: actions/checkout@v3
|
14 |
with:
|
15 |
fetch-depth: 0
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
- name: Push to hub
|
18 |
env:
|
19 |
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
20 |
-
run:
|
|
|
|
|
|
|
|
|
|
1 |
name: Sync to Hugging Face hub
|
2 |
+
|
3 |
on:
|
4 |
push:
|
5 |
branches: [main]
|
6 |
|
7 |
+
# To run this workflow manually from the Actions tab
|
8 |
workflow_dispatch:
|
9 |
|
10 |
jobs:
|
11 |
sync-to-hub:
|
12 |
runs-on: ubuntu-latest
|
13 |
steps:
|
14 |
+
# Checkout the repository with Git LFS support
|
15 |
- uses: actions/checkout@v3
|
16 |
with:
|
17 |
fetch-depth: 0
|
18 |
+
lfs: true # Enable Git LFS support
|
19 |
+
|
20 |
+
# Install Git LFS to handle large files
|
21 |
+
- name: Install Git LFS
|
22 |
+
run: |
|
23 |
+
sudo apt-get update
|
24 |
+
sudo apt-get install git-lfs
|
25 |
+
git lfs install
|
26 |
+
|
27 |
+
# Push the files to the Hugging Face Hub
|
28 |
- name: Push to hub
|
29 |
env:
|
30 |
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
31 |
+
run: |
|
32 |
+
git lfs track "*.sqlite3" # Replace with the extension(s) of your large files, e.g., "*.bin"
|
33 |
+
git add .
|
34 |
+
git commit -m "Add large files"
|
35 |
+
git push https://OxbridegeEcon:[email protected]/spaces/Oxbridge-Economics/Mailbox main --force
|
app/controllers/mail.py
CHANGED
@@ -96,24 +96,24 @@ def list_emails(messages):
|
|
96 |
print(metadata, msg["payload"]["mimeType"])
|
97 |
ids = []
|
98 |
documents = []
|
99 |
-
|
100 |
if msg["payload"]["mimeType"] in [
|
101 |
"multipart/alternative",
|
102 |
"multipart/related",
|
103 |
"multipart/mixed",
|
104 |
]:
|
105 |
-
|
106 |
attach_docs = []
|
107 |
for part in msg["payload"]["parts"]:
|
108 |
print("mimeType: ", part["mimeType"])
|
109 |
-
|
110 |
-
if part["mimeType"] == "text/plain" and "text/html" not in
|
111 |
body = base64.urlsafe_b64decode(part["body"]["data"]).decode("utf-8")
|
112 |
body = re.sub(r"<[^>]+>", "", body) # Remove HTML tags
|
113 |
metadata["mimeType"] = part["mimeType"]
|
114 |
documents.append(Document(page_content=body, metadata=metadata))
|
115 |
ids.append(msg["id"])
|
116 |
-
elif part["mimeType"] == "text/html" and "text/plain" not in
|
117 |
body = base64.urlsafe_b64decode(part["body"]["data"]).decode("utf-8")
|
118 |
body = re.sub(r"<[^>]+>", "", body)
|
119 |
metadata["mimeType"] = part["mimeType"]
|
@@ -192,7 +192,7 @@ def list_emails(messages):
|
|
192 |
metadata["mimeType"] = msg["payload"]["mimeType"]
|
193 |
documents.append(Document(page_content=body, metadata=metadata))
|
194 |
ids.append(msg["id"])
|
195 |
-
if "multipart/alternative" in
|
196 |
print("Only multipart/alternative found in the email.")
|
197 |
else:
|
198 |
vectorstore.add_documents(documents=documents, ids=ids)
|
|
|
96 |
print(metadata, msg["payload"]["mimeType"])
|
97 |
ids = []
|
98 |
documents = []
|
99 |
+
mime_types = []
|
100 |
if msg["payload"]["mimeType"] in [
|
101 |
"multipart/alternative",
|
102 |
"multipart/related",
|
103 |
"multipart/mixed",
|
104 |
]:
|
105 |
+
mime_types = []
|
106 |
attach_docs = []
|
107 |
for part in msg["payload"]["parts"]:
|
108 |
print("mimeType: ", part["mimeType"])
|
109 |
+
mime_types.append(part["mimeType"])
|
110 |
+
if part["mimeType"] == "text/plain" and "text/html" not in mime_types:
|
111 |
body = base64.urlsafe_b64decode(part["body"]["data"]).decode("utf-8")
|
112 |
body = re.sub(r"<[^>]+>", "", body) # Remove HTML tags
|
113 |
metadata["mimeType"] = part["mimeType"]
|
114 |
documents.append(Document(page_content=body, metadata=metadata))
|
115 |
ids.append(msg["id"])
|
116 |
+
elif part["mimeType"] == "text/html" and "text/plain" not in mime_types:
|
117 |
body = base64.urlsafe_b64decode(part["body"]["data"]).decode("utf-8")
|
118 |
body = re.sub(r"<[^>]+>", "", body)
|
119 |
metadata["mimeType"] = part["mimeType"]
|
|
|
192 |
metadata["mimeType"] = msg["payload"]["mimeType"]
|
193 |
documents.append(Document(page_content=body, metadata=metadata))
|
194 |
ids.append(msg["id"])
|
195 |
+
if "multipart/alternative" in mime_types and len(mime_types) == 1:
|
196 |
print("Only multipart/alternative found in the email.")
|
197 |
else:
|
198 |
vectorstore.add_documents(documents=documents, ids=ids)
|