aboutsummaryrefslogtreecommitdiffstats
path: root/python/quote_parsing
diff options
context:
space:
mode:
Diffstat (limited to 'python/quote_parsing')
-rw-r--r--python/quote_parsing/download_emails.py30
1 files changed, 16 insertions, 14 deletions
diff --git a/python/quote_parsing/download_emails.py b/python/quote_parsing/download_emails.py
index e61f6637..61081ff6 100644
--- a/python/quote_parsing/download_emails.py
+++ b/python/quote_parsing/download_emails.py
@@ -12,24 +12,26 @@ from pytz import timezone
from gmail_helpers import GmailMessage
from email.utils import parsedate_to_datetime
+
def print_citi_html(email):
soup = BeautifulSoup(email.get_content(), features="lxml")
- p = soup.find('p')
+ p = soup.find("p")
s = p.next
if isinstance(s, NavigableString):
l = [unicodedata.normalize("NFKD", s)]
else:
raise ValueError("weird email")
- for br in p.findAll('br'):
+ for br in p.findAll("br"):
s = br.next
if isinstance(s, NavigableString):
l.append(unicodedata.normalize("NFKD", s))
- elif isinstance(s, Tag) and s.name == 'br':
- l.append('\n')
+ elif isinstance(s, Tag) and s.name == "br":
+ l.append("\n")
else:
raise ValueError("weird email")
return "\n".join(l)
+
def save_emails(update=True):
"""Download new emails that were labeled swaptions."""
DATA_DIR = Path(os.getenv("DATA_DIR"))
@@ -43,24 +45,24 @@ def save_emails(update=True):
last_history_id = None
gm = GmailMessage()
- for msg in gm.list_msg_ids('swaptions', last_history_id):
- if msg['id'] in existing_msgs:
+ for msg in gm.list_msg_ids("swaptions", last_history_id):
+ if msg["id"] in existing_msgs:
continue
try:
- message = gm.from_id(msg['id'])
+ message = gm.from_id(msg["id"])
logger.info(message.history_id)
- subject = message['subject']
- date = parsedate_to_datetime(message['date'])
+ subject = message["subject"]
+ date = parsedate_to_datetime(message["date"])
if date.tzinfo is None:
- date = date.replace(tzinfo=timezone('utc'))
- date = date.astimezone(timezone('America/New_York'))
- body = message.get_body('plain')
+ date = date.replace(tzinfo=timezone("utc"))
+ date = date.astimezone(timezone("America/New_York"))
+ body = message.get_body("plain")
if body is None:
- content = print_citi_html(message.get_body('html'))
+ content = print_citi_html(message.get_body("html"))
else:
content = body.get_content()
except (KeyError, UnicodeDecodeError, AttributeError) as e:
- logger.error("error decoding " + msg['id'])
+ logger.error("error decoding " + msg["id"])
continue
else:
save_dir = DATA_DIR / "swaptions" / f"{date:%Y-%m}"