diff options
Diffstat (limited to 'python')
| -rw-r--r-- | python/quote_parsing/parse_emails.py | 51 |
1 files changed, 47 insertions, 4 deletions
diff --git a/python/quote_parsing/parse_emails.py b/python/quote_parsing/parse_emails.py index fe5b7698..9c7eea74 100644 --- a/python/quote_parsing/parse_emails.py +++ b/python/quote_parsing/parse_emails.py @@ -157,6 +157,25 @@ def parse_baml_block(fh, indextype): return makedf(r, indextype, "BAML"), line +def parse_cs_block(fh, indextype): + next(fh) # skip header + r = [] + for line in fh: + line = line.strip() + if line.startswith("Ref") or line == "": + break + line = re.sub("[/|]", " ", line) + vals = re.sub(" +", " ", line).rstrip().split(" ") + strike, *rest = vals + # CS quotes payer first, so we need to move things around a bit + if indextype == "IG": + vals = (strike, *rest[3:6], *rest[:3], rest[6]) + elif indextype == "HY": + vals = (strike, *rest[3:6], *rest[:3], *rest[6:8]) + r.append(vals) + return makedf(r, indextype, "CS") + + def parse_ms_block(fh, indextype): line = next(fh) # skip header if line.strip() == "": # empty block @@ -456,7 +475,7 @@ def parse_citi(fh, indextype, series, quotedate): fwd_index.append(d.copy()) option_stack[expiry] = parse_citi_block(fh, indextype) else: - logger.error("Cant't parse expiry line:", line) + logger.error("Can't parse expiry line:", line) fwd_index = pd.DataFrame.from_records(fwd_index, index="quotedate") fwd_index["quote_source"] = "CITI" return option_stack, fwd_index @@ -466,7 +485,31 @@ def parse_cs(fh, indextype, series, quotedate): option_stack = {} fwd_index = [] d = {"quotedate": quotedate, "index": indextype, "series": series} - return + regex_str = r"Ref:\s*([\d.]+)\s*Fwd: ([\d.]+)\s*Expiry: (\d{2}-\w{3}-\d{2})" + if indextype == "IG": + regex_str += r"\s*Fwd dv01: ([\d.]+).*" + pat = re.compile(regex_str) + + for line in fh: + line = line.strip() + if line.startswith("Ref"): + m = pat.match(line) + if m: + if indextype == "IG": + ref, fwd, expiry, fwd_dv01 = m.groups() + else: + ref, fwd, expiry = m.groups() + expiry = pd.to_datetime(expiry, format="%d-%b-%y") + d.update({"ref": ref, "expiry": expiry, "fwd": fwd}) + if indextype == "IG": + d.update({"fwd_dv01": fwd_dv01}) + fwd_index.append(d.copy()) + option_stack[expiry] = parse_cs_block(fh, indextype) + else: + logger.error("Can't parse expiry line:", line) + fwd_index = pd.DataFrame.from_records(fwd_index, index="quotedate") + fwd_index["quote_source"] = "CS" + return option_stack, fwd_index subject_baml = re.compile(r"(?:Fwd:){0,2}(?:BAML )?(\w{2})([0-9]{1,2})\s") @@ -484,7 +527,7 @@ def parse_email(email, date_received): with email.open("rt") as fh: subject = fh.readline().lstrip() - for source in ["BAML", "GS", "MS", "NOM", "SG", "CITI"]: + for source in ["BAML", "GS", "MS", "NOM", "SG", "CITI", "CS"]: m = globals()[f"subject_{source.lower()}"].match(subject) if m: if source in ["BAML", "CITI"]: @@ -509,7 +552,7 @@ def parse_email(email, date_received): expiration_dates = list_imm_dates(quotedate) parse_fun = globals()[f"parse_{source.lower()}"] key = (quotedate, indextype, series, source) - if source in ["BAML", "CITI"]: + if source in ["BAML", "CITI", "CS"]: return (key, parse_fun(fh, indextype, series, quotedate)) elif source == "GS": return (key, parse_fun(fh, indextype, series, quotedate, ref)) |
