diff options
Diffstat (limited to 'python')
| -rw-r--r-- | python/parse_emails.py | 44 |
1 files changed, 28 insertions, 16 deletions
diff --git a/python/parse_emails.py b/python/parse_emails.py index edf95a56..9f5ec47e 100644 --- a/python/parse_emails.py +++ b/python/parse_emails.py @@ -1,7 +1,6 @@ import pandas as pd import re import os -import pdb from db import dbconn import psycopg2.sql as sql from download_emails import save_emails, errors @@ -51,7 +50,7 @@ def makedf(r, indextype, quote_source): try: df[k] = pd.to_numeric(df[k]) except ValueError: - pdb.set_trace() + breakpoint() df.set_index('strike', inplace=True) return df @@ -70,8 +69,10 @@ def parse_quotedate(fh, date_received): quotedate = quotedate.replace(year=date_received.year) break else: - raise RuntimeError("can't parse date") + raise RuntimeError("can't parse date from {line}") return quotedate + else: + raise RuntimeError("no date received in the email") def parse_refline(line): @@ -83,7 +84,7 @@ def parse_refline(line): d = m.groupdict() d['expiry'] = pd.to_datetime(d['expiry'], format='%d-%b-%y') except AttributeError: - logging.error("something wrong with " + fh.name) + raise RuntimeError(f"can't parse refline {line}") return d @@ -148,7 +149,13 @@ def parse_ms_block(fh, indextype): pay_bid, pay_offer, pay_delta = payer.strip().split() rec_bid, rec_offer, rec_delta = receiver.strip().split() except ValueError: - break + try: + pay_mid, pay_delta = payer.strip().split() + rec_mid, rec_delta = receiver.strip().split() + pay_bid, pay_offer = pay_mid, pay_mid + rec_bid, rec_offer = rec_mid, rec_mid + except ValueError: + raise RuntimeError("Couldn't parse line: {line}") vals = [strike, rec_bid, rec_offer, rec_delta, pay_bid, pay_offer, pay_delta] @@ -165,9 +172,7 @@ def parse_ms_block(fh, indextype): vol, vol_change, be = vol.split() vals += [vol] r.append(vals) - else: - return makedf(r, indextype, "MS") - return None + return makedf(r, indextype, "MS") def parse_nomura_block(fh, indextype): @@ -261,7 +266,9 @@ def parse_ms(fh, indextype, *args): expiry = line.split(" ")[1] expiry = pd.to_datetime(expiry, format="%d-%b-%Y") block = parse_ms_block(fh, indextype) - if block is not None: + if block is None or block.empty: + logging.warning("MS: block is empty for {expiry} expiry") + else: option_stack[expiry] = block return option_stack @@ -338,10 +345,12 @@ def parse_email(email, date_received): indextype, series, ref = m.groups() ref = float(ref) series = int(series) - quotedate = parse_quotedate(fh, date_received) - if quotedate is None: - logging.error("missing quotedate") - continue + try: + quotedate = parse_quotedate(fh, date_received) + except RuntimeError: + logging.warning("couldn't find received date in message: " + "{email.name}, using {date_received}") + quotedate = date_received expiration_dates = list_imm_dates(quotedate) parse_fun = globals()['parse_'+source.lower()] if source == 'BAML': @@ -361,8 +370,7 @@ def parse_email(email, date_received): fwd_index.set_index('quotedate', inplace=True) return (quotedate, indextype, series), (option_stack, fwd_index) else: - raise RuntimeError("can't parse subject line: {0} for email {1}". - format(subject, email.name)) + raise RuntimeError(f"can't parse subject line: {subject} for email {email.name}") def write_todb(swaption_stack, index_data): def gen_sql_str(query, table_name, columns): @@ -383,7 +391,11 @@ def write_todb(swaption_stack, index_data): except StopIteration: continue else: - df = swaption_stack.loc[(t.quotedate, t.index, t.series, t.expiry)] + try: + df = swaption_stack.loc[(t.quotedate, t.index, t.series, t.expiry)] + except KeyError as e: + raise RuntimeError("missing key in swaption_stack: " + f"{t.quotedate}, {t.index}, {t.series}, {t.expiry}") df['ref_id'] = ref_id c.executemany(gen_sql_str(query, "swaption_quotes", df.columns), df.itertuples(index=False)) |
