Spaces:
Running
Running
from sendgrid import SendGridAPIClient | |
from sendgrid.helpers.mail import Mail, Email, To, Content | |
import argparse | |
import yaml | |
import os | |
from dotenv import load_dotenv | |
import openai | |
from relevancy import generate_relevance_score, process_subject_fields | |
from download_new_papers import get_papers | |
from datetime import date | |
# Hackathon quality code. Don't judge too harshly. | |
# Feel free to submit pull requests to improve the code. | |
topics = { | |
"Physics": "", | |
"Mathematics": "math", | |
"Computer Science": "cs", | |
"Quantitative Biology": "q-bio", | |
"Quantitative Finance": "q-fin", | |
"Statistics": "stat", | |
"Electrical Engineering and Systems Science": "eess", | |
"Economics": "econ", | |
} | |
physics_topics = { | |
"Astrophysics": "astro-ph", | |
"Condensed Matter": "cond-mat", | |
"General Relativity and Quantum Cosmology": "gr-qc", | |
"High Energy Physics - Experiment": "hep-ex", | |
"High Energy Physics - Lattice": "hep-lat", | |
"High Energy Physics - Phenomenology": "hep-ph", | |
"High Energy Physics - Theory": "hep-th", | |
"Mathematical Physics": "math-ph", | |
"Nonlinear Sciences": "nlin", | |
"Nuclear Experiment": "nucl-ex", | |
"Nuclear Theory": "nucl-th", | |
"Physics": "physics", | |
"Quantum Physics": "quant-ph", | |
} | |
# TODO: surely theres a better way | |
category_map = { | |
"Astrophysics": [ | |
"Astrophysics of Galaxies", | |
"Cosmology and Nongalactic Astrophysics", | |
"Earth and Planetary Astrophysics", | |
"High Energy Astrophysical Phenomena", | |
"Instrumentation and Methods for Astrophysics", | |
"Solar and Stellar Astrophysics", | |
], | |
"Condensed Matter": [ | |
"Disordered Systems and Neural Networks", | |
"Materials Science", | |
"Mesoscale and Nanoscale Physics", | |
"Other Condensed Matter", | |
"Quantum Gases", | |
"Soft Condensed Matter", | |
"Statistical Mechanics", | |
"Strongly Correlated Electrons", | |
"Superconductivity", | |
], | |
"General Relativity and Quantum Cosmology": ["None"], | |
"High Energy Physics - Experiment": ["None"], | |
"High Energy Physics - Lattice": ["None"], | |
"High Energy Physics - Phenomenology": ["None"], | |
"High Energy Physics - Theory": ["None"], | |
"Mathematical Physics": ["None"], | |
"Nonlinear Sciences": [ | |
"Adaptation and Self-Organizing Systems", | |
"Cellular Automata and Lattice Gases", | |
"Chaotic Dynamics", | |
"Exactly Solvable and Integrable Systems", | |
"Pattern Formation and Solitons", | |
], | |
"Nuclear Experiment": ["None"], | |
"Nuclear Theory": ["None"], | |
"Physics": [ | |
"Accelerator Physics", | |
"Applied Physics", | |
"Atmospheric and Oceanic Physics", | |
"Atomic and Molecular Clusters", | |
"Atomic Physics", | |
"Biological Physics", | |
"Chemical Physics", | |
"Classical Physics", | |
"Computational Physics", | |
"Data Analysis, Statistics and Probability", | |
"Fluid Dynamics", | |
"General Physics", | |
"Geophysics", | |
"History and Philosophy of Physics", | |
"Instrumentation and Detectors", | |
"Medical Physics", | |
"Optics", | |
"Physics and Society", | |
"Physics Education", | |
"Plasma Physics", | |
"Popular Physics", | |
"Space Physics", | |
], | |
"Quantum Physics": ["None"], | |
"Mathematics": [ | |
"Algebraic Geometry", | |
"Algebraic Topology", | |
"Analysis of PDEs", | |
"Category Theory", | |
"Classical Analysis and ODEs", | |
"Combinatorics", | |
"Commutative Algebra", | |
"Complex Variables", | |
"Differential Geometry", | |
"Dynamical Systems", | |
"Functional Analysis", | |
"General Mathematics", | |
"General Topology", | |
"Geometric Topology", | |
"Group Theory", | |
"History and Overview", | |
"Information Theory", | |
"K-Theory and Homology", | |
"Logic", | |
"Mathematical Physics", | |
"Metric Geometry", | |
"Number Theory", | |
"Numerical Analysis", | |
"Operator Algebras", | |
"Optimization and Control", | |
"Probability", | |
"Quantum Algebra", | |
"Representation Theory", | |
"Rings and Algebras", | |
"Spectral Theory", | |
"Statistics Theory", | |
"Symplectic Geometry", | |
], | |
"Computer Science": [ | |
"Artificial Intelligence", | |
"Computation and Language", | |
"Computational Complexity", | |
"Computational Engineering, Finance, and Science", | |
"Computational Geometry", | |
"Computer Science and Game Theory", | |
"Computer Vision and Pattern Recognition", | |
"Computers and Society", | |
"Cryptography and Security", | |
"Data Structures and Algorithms", | |
"Databases", | |
"Digital Libraries", | |
"Discrete Mathematics", | |
"Distributed, Parallel, and Cluster Computing", | |
"Emerging Technologies", | |
"Formal Languages and Automata Theory", | |
"General Literature", | |
"Graphics", | |
"Hardware Architecture", | |
"Human-Computer Interaction", | |
"Information Retrieval", | |
"Information Theory", | |
"Logic in Computer Science", | |
"Machine Learning", | |
"Mathematical Software", | |
"Multiagent Systems", | |
"Multimedia", | |
"Networking and Internet Architecture", | |
"Neural and Evolutionary Computing", | |
"Numerical Analysis", | |
"Operating Systems", | |
"Other Computer Science", | |
"Performance", | |
"Programming Languages", | |
"Robotics", | |
"Social and Information Networks", | |
"Software Engineering", | |
"Sound", | |
"Symbolic Computation", | |
"Systems and Control", | |
], | |
"Quantitative Biology": [ | |
"Biomolecules", | |
"Cell Behavior", | |
"Genomics", | |
"Molecular Networks", | |
"Neurons and Cognition", | |
"Other Quantitative Biology", | |
"Populations and Evolution", | |
"Quantitative Methods", | |
"Subcellular Processes", | |
"Tissues and Organs", | |
], | |
"Quantitative Finance": [ | |
"Computational Finance", | |
"Economics", | |
"General Finance", | |
"Mathematical Finance", | |
"Portfolio Management", | |
"Pricing of Securities", | |
"Risk Management", | |
"Statistical Finance", | |
"Trading and Market Microstructure", | |
], | |
"Statistics": [ | |
"Applications", | |
"Computation", | |
"Machine Learning", | |
"Methodology", | |
"Other Statistics", | |
"Statistics Theory", | |
], | |
"Electrical Engineering and Systems Science": [ | |
"Audio and Speech Processing", | |
"Image and Video Processing", | |
"Signal Processing", | |
"Systems and Control", | |
], | |
"Economics": ["Econometrics", "General Economics", "Theoretical Economics"], | |
} | |
def generate_body(topic, categories, interest, threshold): | |
if topic == "Physics": | |
raise RuntimeError("You must choose a physics subtopic.") | |
elif topic in physics_topics: | |
abbr = physics_topics[topic] | |
elif topic in topics: | |
abbr = topics[topic] | |
else: | |
raise RuntimeError(f"Invalid topic {topic}") | |
if categories: | |
for category in categories: | |
if category not in category_map[topic]: | |
raise RuntimeError(f"{category} is not a category of {topic}") | |
papers = get_papers(abbr) | |
papers = [ | |
t | |
for t in papers | |
if bool(set(process_subject_fields(t["subjects"])) & set(categories)) | |
] | |
else: | |
papers = get_papers(abbr) | |
if interest: | |
relevancy, hallucination = generate_relevance_score( | |
papers, | |
query={"interest": interest}, | |
threshold_score=threshold, | |
num_paper_in_prompt=2, | |
) | |
body = "<br><br>".join( | |
[ | |
f'<b>Subject: </b>{paper["subjects"]}<br><b>Title:</b> <a href="{paper["main_page"]}">{paper["title"]}</a><br><b>Authors:</b> {paper["authors"]}<br>' | |
f'<b>Score:</b> {paper["Relevancy score"]}<br><b>Reason:</b> {paper["Reasons for match"]}<br>' | |
f'<b>Goal:</b> {paper["Goal"]}<br><b>Data</b>: {paper["Data"]}<br><b>Methodology:</b> {paper["Methodology"]}<br>' | |
f'<b>Experiments & Results</b>: {paper["Experiments & Results"]}<br><b>Git</b>: {paper["Git"]}<br>' | |
f'<b>Discussion & Next steps</b>: {paper["Discussion & Next steps"]}' | |
for paper in relevancy | |
] | |
) | |
if hallucination: | |
body = ( | |
"Warning: the model hallucinated some papers. We have tried to remove them, but the scores may not be accurate.<br><br>" | |
+ body | |
) | |
else: | |
body = "<br><br>".join( | |
[ | |
f'Title: <a href="{paper["main_page"]}">{paper["title"]}</a><br>Authors: {paper["authors"]}' | |
for paper in papers | |
] | |
) | |
return body | |
def get_date(): | |
today = date.today() | |
formatted_date = today.strftime("%d%m%Y") | |
return formatted_date | |
if __name__ == "__main__": | |
# Load the .env file. | |
load_dotenv() | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
"--config", help="yaml config file to use", default="config.yaml" | |
) | |
args = parser.parse_args() | |
with open(args.config, "r") as f: | |
config = yaml.safe_load(f) | |
if "OPENAI_API_KEY" not in os.environ: | |
raise RuntimeError("No openai api key found") | |
openai.api_key = os.environ.get("OPENAI_API_KEY") | |
topic = config["topic"] | |
categories = config["categories"] | |
from_email = os.environ.get("FROM_EMAIL") | |
to_email = os.environ.get("TO_EMAIL") | |
threshold = config["threshold"] | |
interest = config["interest"] | |
body = generate_body(topic, categories, interest, threshold) | |
today_date = get_date() | |
with open(f"digest_{today_date}.html", "w") as f: | |
f.write(body) | |
if os.environ.get("SENDGRID_API_KEY", None): | |
sg = SendGridAPIClient(api_key=os.environ.get("SENDGRID_API_KEY")) | |
from_email = Email(from_email) # Change to your verified sender | |
to_email = To(to_email) | |
subject = date.today().strftime("Personalized arXiv Digest, %d %b %Y") | |
content = Content("text/html", body) | |
mail = Mail(from_email, to_email, subject, content) | |
mail_json = mail.get() | |
# Send an HTTP POST request to /mail/send | |
response = sg.client.mail.send.post(request_body=mail_json) | |
if response.status_code >= 200 and response.status_code <= 300: | |
print("Send test email: Success!") | |
else: | |
print("Send test email: Failure ({response.status_code}, {response.text})") | |
else: | |
print("No sendgrid api key found. Skipping email") | |