diff options
Diffstat (limited to 'python')
| -rw-r--r-- | python/quote_parsing/download_emails.py | 30 |
1 files changed, 16 insertions, 14 deletions
diff --git a/python/quote_parsing/download_emails.py b/python/quote_parsing/download_emails.py index e61f6637..61081ff6 100644 --- a/python/quote_parsing/download_emails.py +++ b/python/quote_parsing/download_emails.py @@ -12,24 +12,26 @@ from pytz import timezone from gmail_helpers import GmailMessage from email.utils import parsedate_to_datetime + def print_citi_html(email): soup = BeautifulSoup(email.get_content(), features="lxml") - p = soup.find('p') + p = soup.find("p") s = p.next if isinstance(s, NavigableString): l = [unicodedata.normalize("NFKD", s)] else: raise ValueError("weird email") - for br in p.findAll('br'): + for br in p.findAll("br"): s = br.next if isinstance(s, NavigableString): l.append(unicodedata.normalize("NFKD", s)) - elif isinstance(s, Tag) and s.name == 'br': - l.append('\n') + elif isinstance(s, Tag) and s.name == "br": + l.append("\n") else: raise ValueError("weird email") return "\n".join(l) + def save_emails(update=True): """Download new emails that were labeled swaptions.""" DATA_DIR = Path(os.getenv("DATA_DIR")) @@ -43,24 +45,24 @@ def save_emails(update=True): last_history_id = None gm = GmailMessage() - for msg in gm.list_msg_ids('swaptions', last_history_id): - if msg['id'] in existing_msgs: + for msg in gm.list_msg_ids("swaptions", last_history_id): + if msg["id"] in existing_msgs: continue try: - message = gm.from_id(msg['id']) + message = gm.from_id(msg["id"]) logger.info(message.history_id) - subject = message['subject'] - date = parsedate_to_datetime(message['date']) + subject = message["subject"] + date = parsedate_to_datetime(message["date"]) if date.tzinfo is None: - date = date.replace(tzinfo=timezone('utc')) - date = date.astimezone(timezone('America/New_York')) - body = message.get_body('plain') + date = date.replace(tzinfo=timezone("utc")) + date = date.astimezone(timezone("America/New_York")) + body = message.get_body("plain") if body is None: - content = print_citi_html(message.get_body('html')) + content = print_citi_html(message.get_body("html")) else: content = body.get_content() except (KeyError, UnicodeDecodeError, AttributeError) as e: - logger.error("error decoding " + msg['id']) + logger.error("error decoding " + msg["id"]) continue else: save_dir = DATA_DIR / "swaptions" / f"{date:%Y-%m}" |
