import base64 import email import json import logging import os import sys from apiclient import errors from pathlib import Path from pytz import timezone from gmail_helpers import GmailMessage from email.utils import parsedate_to_datetime def save_emails(update=True): """Download new emails that were labeled swaptions.""" DATA_DIR = Path(os.getenv("DATA_DIR")) if update: try: last_history_id = int((DATA_DIR / ".lastHistoryId").read_text()) except FileNotFoundError: logging.error("can't find .lastHistoryId file") sys.exit() existing_msgs = [] else: p = DATA_DIR / "swaptions" existing_msgs = set(str(x).split("_")[1] for x in p.iterdir() if x.is_file()) last_history_id = None for msg in GmailMessage.list_msg_ids('swaptions', last_history_id): if msg['id'] in existing_msgs: continue try: message = GmailMessage.from_id(msg['id']) logging.info(message.history_id) subject = message['subject'] date = parsedate_to_datetime(message['date']) if date.tzinfo is None: date = date.replace(tzinfo=timezone('utc')) date = date.astimezone(timezone('America/New_York')) body = message.get_body('plain') content = body.get_content() except (KeyError, UnicodeDecodeError, AttributeError) as e: logging.error("error decoding " + msg['id']) continue else: email = (DATA_DIR / "swaptions" / f"{date:%Y-%m-%d %H-%M-%S}_{msg['id']}") with email.open("w") as fh: fh.write(subject + "\r\n") fh.write(content) try: new_history_id = message.history_id (DATA_DIR / ".lastHistoryId").write_text(message.history_id) except UnboundLocalError: pass if __name__ == '__main__': try: save_emails() except errors.HttpError as e: logging.error(e) save_emails(update=False)