Spaces:
Running
Running
File size: 10,806 Bytes
ef10e9f 9455411 ebc10b6 ef10e9f cb7341f ef10e9f 9455411 ef10e9f 9455411 ef10e9f 9455411 ef10e9f 9455411 ef10e9f 9455411 ef10e9f 9455411 ef10e9f 9455411 ef10e9f fc0e67e 9455411 ef10e9f 9455411 fc807c3 284474f 9455411 ef10e9f 9455411 ef10e9f 9455411 ef10e9f cb7341f ef10e9f 9455411 ef10e9f 9455411 ef10e9f ebc10b6 ef10e9f ebc10b6 ef10e9f e68fb0a ef10e9f 9910188 cb7341f ef10e9f 9455411 9910188 ef10e9f 9910188 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 |
from sendgrid import SendGridAPIClient
from sendgrid.helpers.mail import Mail, Email, To, Content
import argparse
import yaml
import os
from dotenv import load_dotenv
import openai
from relevancy import generate_relevance_score, process_subject_fields
from download_new_papers import get_papers
from datetime import date
# Hackathon quality code. Don't judge too harshly.
# Feel free to submit pull requests to improve the code.
topics = {
"Physics": "",
"Mathematics": "math",
"Computer Science": "cs",
"Quantitative Biology": "q-bio",
"Quantitative Finance": "q-fin",
"Statistics": "stat",
"Electrical Engineering and Systems Science": "eess",
"Economics": "econ",
}
physics_topics = {
"Astrophysics": "astro-ph",
"Condensed Matter": "cond-mat",
"General Relativity and Quantum Cosmology": "gr-qc",
"High Energy Physics - Experiment": "hep-ex",
"High Energy Physics - Lattice": "hep-lat",
"High Energy Physics - Phenomenology": "hep-ph",
"High Energy Physics - Theory": "hep-th",
"Mathematical Physics": "math-ph",
"Nonlinear Sciences": "nlin",
"Nuclear Experiment": "nucl-ex",
"Nuclear Theory": "nucl-th",
"Physics": "physics",
"Quantum Physics": "quant-ph",
}
# TODO: surely theres a better way
category_map = {
"Astrophysics": [
"Astrophysics of Galaxies",
"Cosmology and Nongalactic Astrophysics",
"Earth and Planetary Astrophysics",
"High Energy Astrophysical Phenomena",
"Instrumentation and Methods for Astrophysics",
"Solar and Stellar Astrophysics",
],
"Condensed Matter": [
"Disordered Systems and Neural Networks",
"Materials Science",
"Mesoscale and Nanoscale Physics",
"Other Condensed Matter",
"Quantum Gases",
"Soft Condensed Matter",
"Statistical Mechanics",
"Strongly Correlated Electrons",
"Superconductivity",
],
"General Relativity and Quantum Cosmology": ["None"],
"High Energy Physics - Experiment": ["None"],
"High Energy Physics - Lattice": ["None"],
"High Energy Physics - Phenomenology": ["None"],
"High Energy Physics - Theory": ["None"],
"Mathematical Physics": ["None"],
"Nonlinear Sciences": [
"Adaptation and Self-Organizing Systems",
"Cellular Automata and Lattice Gases",
"Chaotic Dynamics",
"Exactly Solvable and Integrable Systems",
"Pattern Formation and Solitons",
],
"Nuclear Experiment": ["None"],
"Nuclear Theory": ["None"],
"Physics": [
"Accelerator Physics",
"Applied Physics",
"Atmospheric and Oceanic Physics",
"Atomic and Molecular Clusters",
"Atomic Physics",
"Biological Physics",
"Chemical Physics",
"Classical Physics",
"Computational Physics",
"Data Analysis, Statistics and Probability",
"Fluid Dynamics",
"General Physics",
"Geophysics",
"History and Philosophy of Physics",
"Instrumentation and Detectors",
"Medical Physics",
"Optics",
"Physics and Society",
"Physics Education",
"Plasma Physics",
"Popular Physics",
"Space Physics",
],
"Quantum Physics": ["None"],
"Mathematics": [
"Algebraic Geometry",
"Algebraic Topology",
"Analysis of PDEs",
"Category Theory",
"Classical Analysis and ODEs",
"Combinatorics",
"Commutative Algebra",
"Complex Variables",
"Differential Geometry",
"Dynamical Systems",
"Functional Analysis",
"General Mathematics",
"General Topology",
"Geometric Topology",
"Group Theory",
"History and Overview",
"Information Theory",
"K-Theory and Homology",
"Logic",
"Mathematical Physics",
"Metric Geometry",
"Number Theory",
"Numerical Analysis",
"Operator Algebras",
"Optimization and Control",
"Probability",
"Quantum Algebra",
"Representation Theory",
"Rings and Algebras",
"Spectral Theory",
"Statistics Theory",
"Symplectic Geometry",
],
"Computer Science": [
"Artificial Intelligence",
"Computation and Language",
"Computational Complexity",
"Computational Engineering, Finance, and Science",
"Computational Geometry",
"Computer Science and Game Theory",
"Computer Vision and Pattern Recognition",
"Computers and Society",
"Cryptography and Security",
"Data Structures and Algorithms",
"Databases",
"Digital Libraries",
"Discrete Mathematics",
"Distributed, Parallel, and Cluster Computing",
"Emerging Technologies",
"Formal Languages and Automata Theory",
"General Literature",
"Graphics",
"Hardware Architecture",
"Human-Computer Interaction",
"Information Retrieval",
"Information Theory",
"Logic in Computer Science",
"Machine Learning",
"Mathematical Software",
"Multiagent Systems",
"Multimedia",
"Networking and Internet Architecture",
"Neural and Evolutionary Computing",
"Numerical Analysis",
"Operating Systems",
"Other Computer Science",
"Performance",
"Programming Languages",
"Robotics",
"Social and Information Networks",
"Software Engineering",
"Sound",
"Symbolic Computation",
"Systems and Control",
],
"Quantitative Biology": [
"Biomolecules",
"Cell Behavior",
"Genomics",
"Molecular Networks",
"Neurons and Cognition",
"Other Quantitative Biology",
"Populations and Evolution",
"Quantitative Methods",
"Subcellular Processes",
"Tissues and Organs",
],
"Quantitative Finance": [
"Computational Finance",
"Economics",
"General Finance",
"Mathematical Finance",
"Portfolio Management",
"Pricing of Securities",
"Risk Management",
"Statistical Finance",
"Trading and Market Microstructure",
],
"Statistics": [
"Applications",
"Computation",
"Machine Learning",
"Methodology",
"Other Statistics",
"Statistics Theory",
],
"Electrical Engineering and Systems Science": [
"Audio and Speech Processing",
"Image and Video Processing",
"Signal Processing",
"Systems and Control",
],
"Economics": ["Econometrics", "General Economics", "Theoretical Economics"],
}
def generate_body(topic, categories, interest, threshold):
if topic == "Physics":
raise RuntimeError("You must choose a physics subtopic.")
elif topic in physics_topics:
abbr = physics_topics[topic]
elif topic in topics:
abbr = topics[topic]
else:
raise RuntimeError(f"Invalid topic {topic}")
if categories:
for category in categories:
if category not in category_map[topic]:
raise RuntimeError(f"{category} is not a category of {topic}")
papers = get_papers(abbr)
papers = [
t
for t in papers
if bool(set(process_subject_fields(t["subjects"])) & set(categories))
]
else:
papers = get_papers(abbr)
if interest:
relevancy, hallucination = generate_relevance_score(
papers,
query={"interest": interest},
threshold_score=threshold,
num_paper_in_prompt=2,
)
body = "<br><br>".join(
[
f'<b>Subject: </b>{paper["subjects"]}<br><b>Title:</b> <a href="{paper["main_page"]}">{paper["title"]}</a><br><b>Authors:</b> {paper["authors"]}<br>'
f'<b>Score:</b> {paper["Relevancy score"]}<br><b>Reason:</b> {paper["Reasons for match"]}<br>'
f'<b>Goal:</b> {paper["Goal"]}<br><b>Data</b>: {paper["Data"]}<br><b>Methodology:</b> {paper["Methodology"]}<br>'
f'<b>Experiments & Results</b>: {paper["Experiments & Results"]}<br><b>Git</b>: {paper["Git"]}<br>'
f'<b>Discussion & Next steps</b>: {paper["Discussion & Next steps"]}'
for paper in relevancy
]
)
if hallucination:
body = (
"Warning: the model hallucinated some papers. We have tried to remove them, but the scores may not be accurate.<br><br>"
+ body
)
else:
body = "<br><br>".join(
[
f'Title: <a href="{paper["main_page"]}">{paper["title"]}</a><br>Authors: {paper["authors"]}'
for paper in papers
]
)
return body
def get_date():
today = date.today()
formatted_date = today.strftime("%d%m%Y")
return formatted_date
if __name__ == "__main__":
# Load the .env file.
load_dotenv()
parser = argparse.ArgumentParser()
parser.add_argument(
"--config", help="yaml config file to use", default="config.yaml"
)
args = parser.parse_args()
with open(args.config, "r") as f:
config = yaml.safe_load(f)
if "OPENAI_API_KEY" not in os.environ:
raise RuntimeError("No openai api key found")
openai.api_key = os.environ.get("OPENAI_API_KEY")
topic = config["topic"]
categories = config["categories"]
from_email = os.environ.get("FROM_EMAIL")
to_email = os.environ.get("TO_EMAIL")
threshold = config["threshold"]
interest = config["interest"]
body = generate_body(topic, categories, interest, threshold)
today_date = get_date()
with open(f"digest_{today_date}.html", "w") as f:
f.write(body)
if os.environ.get("SENDGRID_API_KEY", None):
sg = SendGridAPIClient(api_key=os.environ.get("SENDGRID_API_KEY"))
from_email = Email(from_email) # Change to your verified sender
to_email = To(to_email)
subject = date.today().strftime("Personalized arXiv Digest, %d %b %Y")
content = Content("text/html", body)
mail = Mail(from_email, to_email, subject, content)
mail_json = mail.get()
# Send an HTTP POST request to /mail/send
response = sg.client.mail.send.post(request_body=mail_json)
if response.status_code >= 200 and response.status_code <= 300:
print("Send test email: Success!")
else:
print("Send test email: Failure ({response.status_code}, {response.text})")
else:
print("No sendgrid api key found. Skipping email")
|