diff options
| -rw-r--r-- | python/quote_parsing/parse_emails.py | 57 |
1 files changed, 54 insertions, 3 deletions
diff --git a/python/quote_parsing/parse_emails.py b/python/quote_parsing/parse_emails.py index 33a029ea..0b2f05a7 100644 --- a/python/quote_parsing/parse_emails.py +++ b/python/quote_parsing/parse_emails.py @@ -53,7 +53,7 @@ def makedf(r, indextype, quote_source): df[col] = df[col].str.rstrip("%").astype("float") / 100 except ValueError: # typo in one email df[col] = ( - df[col].str.rstrip("%").str.replace("n", "").astype("float") / 100 + pd.to_numeric(df[col].str.rstrip("%").str.replace("n", "")) / 100 ) if quote_source == "GS": for col in ["pay_bid", "pay_offer", "rec_bid", "rec_offer"]: @@ -156,6 +156,24 @@ def parse_baml_block(fh, indextype): return makedf(r, indextype, "BAML"), line +def parse_bnp_block(fh, indextype): + next(fh) # skip header + r = [] + for line in fh: + line = line.strip() + if line.startswith("Ref") or line == "": + break + line = re.sub("[/]", " ", line) + vals = re.sub(" +", " ", line).rstrip().split(" ") + if indextype == "HY": + vals += [""] + if len(vals) < 3: # something went wrong + line = "" + break + r.append(vals) + return makedf(r, indextype, "BNP") + + def parse_cs_block(fh, indextype): next(fh) # skip header r = [] @@ -509,6 +527,33 @@ def parse_cs(fh, indextype, series, quotedate): return option_stack, fwd_index +def parse_bnp(fh, indextype, series, quotedate, expiration_dates): + option_stack = {} + fwd_index = [] + d = {"quotedate": quotedate, "index": indextype, "series": series} + regex = ( + r"Ref\s(?P<ref>[\d.]+) - (?P<expiry>\w{3}\d{2}) - Fwd\s(?P<fwdspread>[\d.]+)" + ) + pat = re.compile(regex) + for line in fh: + lin = line.strip() + if line.startswith("Ref"): + m = pat.match(line) + if m: + d.update(**m.groupdict()) + expiry_month = datetime.datetime.strptime(d["expiry"], "%b%y").month + d["expiry"] = next( + d for d in expiration_dates if d.month == expiry_month + ) + fwd_index.append(d.copy()) + option_stack[d["expiry"]] = parse_bnp_block(fh, indextype) + else: + logger.error("Can't parse expiry line:", line, "filename:", fh.name) + fwd_index = pd.DataFrame.from_records(fwd_index, index="quotedate") + fwd_index["quote_source"] = "BNP" + return option_stack, fwd_index + + subject_baml = re.compile(r"(?:Fwd:){0,2}(?:BAML )?(\w{2})([0-9]{1,2})\s") subject_ms = re.compile( r"[^$]*\$\$ MS CDX OPTIONS: (IG|HY)(\d{2})[^-]*- REF[^\d]*([\d.]+)" @@ -518,16 +563,17 @@ subject_gs = re.compile(r"(?:FW: |Fwd: )?GS (IG|HY)(\d{2}) 5y.*- Ref [^\d]*([\d. subject_sg = re.compile(r"SG OPTIONS - CDX (IG|HY) S(\d{2}).* REF[^\d]*([\d.]+)") subject_citi = re.compile(r"(?:Fwd:)?Citi Options: (IG|HY)(\d{2}) 5Y") subject_cs = re.compile(r"CS CDX (IG|HY)(\d{2}) Options - Ref = ([\d.]+)[^\d]*") +subject_bnp = re.compile(r"CDX OPTIONS RUN: (IG|HY)(\d{2}).*") def parse_email(email, date_received): with email.open("rt") as fh: subject = fh.readline().lstrip() - for source in ["BAML", "GS", "MS", "NOM", "SG", "CITI", "CS"]: + for source in ["BAML", "GS", "MS", "NOM", "SG", "CITI", "CS", "BNP"]: m = globals()[f"subject_{source.lower()}"].match(subject) if m: - if source in ["BAML", "CITI"]: + if source in ["BAML", "CITI", "BNP"]: indextype, series = m.groups() else: indextype, series, ref = m.groups() @@ -553,6 +599,11 @@ def parse_email(email, date_received): return (key, parse_fun(fh, indextype, series, quotedate)) elif source == "GS": return (key, parse_fun(fh, indextype, series, quotedate, ref)) + elif source == "BNP": + return ( + key, + parse_fun(fh, indextype, series, quotedate, expiration_dates), + ) else: option_stack = parse_fun(fh, indextype, expiration_dates) fwd_index = pd.DataFrame( |
