diff options
| -rw-r--r-- | python/quote_parsing/__main__.py | 19 | ||||
| -rw-r--r-- | python/quote_parsing/parse_emails.py | 21 |
2 files changed, 26 insertions, 14 deletions
diff --git a/python/quote_parsing/__main__.py b/python/quote_parsing/__main__.py index c97b21ef..95657dea 100644 --- a/python/quote_parsing/__main__.py +++ b/python/quote_parsing/__main__.py @@ -47,6 +47,7 @@ for f in emails: logger.error(f"Something wrong with email: {f.name}") continue swaption_stack[key] = pd.concat(option_stack, names=["expiry", "strike"]) + fwd_index["msg_id"] = int(msg_id, 16) index_data = index_data.append(fwd_index) already_uploaded[msg_id] = key[0] if index_data.empty: @@ -56,14 +57,20 @@ for col in ["fwdbpv", "fwdprice", "fwdspread", "ref"]: index_data[col] = index_data[col].astype("float") index_data["index"] = index_data["index"].astype("category") -swaption_stack = pd.concat(swaption_stack, names=["quotedate", "index", "series"]) -swaption_stack = swaption_stack.reset_index() -swaption_stack = swaption_stack.drop_duplicates( - ["quotedate", "index", "series", "expiry", "strike"] +index_names = ["quotedate", "index", "series", "quote_source"] +swaption_stack = pd.concat(swaption_stack, names=index_names) +dup = swaption_stack.index.duplicated() +if dup.any(): + logger.warning("duplicated data") + swaption_stack = swaption_stack[~dup] +swaption_stack = swaption_stack.reset_index().set_index( + ["quotedate", "index", "series", "expiry", "quote_source"] ) -swaption_stack = swaption_stack.set_index(["quotedate", "index", "series", "expiry"]) +swaption_stack = swaption_stack.sort_index() index_data = index_data.reset_index() -index_data = index_data.drop_duplicates(["quotedate", "index", "series", "expiry"]) +index_data = index_data.drop_duplicates( + ["quotedate", "index", "series", "expiry", "quote_source"] +) from utils.db import serenitas_pool conn = serenitas_pool.getconn() diff --git a/python/quote_parsing/parse_emails.py b/python/quote_parsing/parse_emails.py index 961e47b5..47a17e9e 100644 --- a/python/quote_parsing/parse_emails.py +++ b/python/quote_parsing/parse_emails.py @@ -408,7 +408,7 @@ subject_sg = re.compile(r"SG OPTIONS - CDX (IG|HY) S(\d{2}).* REF[^\d]*([\d.]+)" subject_citi = re.compile(r"(?:Fwd:)?Citi Options: (IG|HY)(\d{2}) 5Y") def parse_email(email, date_received): - with open(email.path, "rt") as fh: + with email.open("rt") as fh: subject = fh.readline().lstrip() for source in ['BAML', 'GS', 'MS', 'NOM', 'SG', 'CITI']: @@ -431,11 +431,12 @@ def parse_email(email, date_received): expiration_dates = list_imm_dates(quotedate) parse_fun = globals()[f'parse_{source.lower()}'] + key = (quotedate, indextype, series, source) if source in ['BAML', 'CITI']: - return (quotedate, indextype, series), \ + return (key, parse_fun(fh, indextype, series, quotedate)) parse_fun(fh, indextype, series, quotedate) elif source == "GS": - return (quotedate, indextype, series), \ + return (key, parse_fun(fh, indextype, series, quotedate, ref)) parse_fun(fh, indextype, series, quotedate, ref) else: option_stack = parse_fun(fh, indextype, expiration_dates) @@ -446,7 +447,7 @@ def parse_email(email, date_received): 'expiry': list(option_stack.keys()), 'quote_source': source}) fwd_index.set_index('quotedate', inplace=True) - return (quotedate, indextype, series), (option_stack, fwd_index) + return (key, (option_stack, fwd_index)) else: raise RuntimeError(f"can't parse subject line: {subject} for email {email.name}") @@ -469,12 +470,16 @@ def write_todb(swaption_stack, index_data, conn): continue else: try: - df = swaption_stack.loc[(t.quotedate, t.index, t.series, t.expiry),] + df = swaption_stack.loc[ + (t.quotedate, t.index, t.series, t.expiry, t.quote_source), + ] except KeyError as e: - logger.warning("missing key in swaption_stack: " - f"{t.quotedate}, {t.index}, {t.series}, {t.expiry}") + logger.warning( + "missing key in swaption_stack: " + f"{t.quotedate}, {t.index}, {t.series}, {t.expiry}, {t.quote_source}" + ) continue - except IndexingError: + except IndexError: breakpoint() df['ref_id'] = ref_id c.executemany(gen_sql_str(query, "swaption_quotes", df.columns), |
