diff options
Diffstat (limited to 'python/quote_parsing/parse_emails.py')
| -rw-r--r-- | python/quote_parsing/parse_emails.py | 45 |
1 files changed, 22 insertions, 23 deletions
diff --git a/python/quote_parsing/parse_emails.py b/python/quote_parsing/parse_emails.py index c9724a1d..a3134dbf 100644 --- a/python/quote_parsing/parse_emails.py +++ b/python/quote_parsing/parse_emails.py @@ -119,10 +119,11 @@ def parse_refline(line): return d -def parse_baml(fh, indextype, series, quotedate, *args): +def parse_baml(fh, index_desc, quotedate, *args): option_stack = {} fwd_index = [] line = "" + index_desc["quotedate"] = quotedate while True: if line == "": try: @@ -131,8 +132,8 @@ def parse_baml(fh, indextype, series, quotedate, *args): break if line.startswith("Ref"): d = parse_refline(line) - d.update({"quotedate": quotedate, "index": indextype, "series": series}) - df, line = parse_baml_block(fh, indextype) + d.update(index_desc) + df, line = parse_baml_block(fh, index_desc["index"]) option_stack[d["expiry"]] = df fwd_index.append(d) else: @@ -455,10 +456,10 @@ def parse_sg(fh, indextype, expiration_dates): return option_stack -def parse_gs(fh, indextype, series, quotedate, ref): +def parse_gs(fh, index_desc, quotedate, ref): option_stack = {} fwd_index = [] - d = {"quotedate": quotedate, "index": indextype, "series": series, "ref": ref} + d = {"quotedate": quotedate, "ref": ref, **index_desc} pat = re.compile(r"Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)") line = next(fh).strip() @@ -472,7 +473,7 @@ def parse_gs(fh, indextype, series, quotedate, ref): {"fwdspread": fwdspread, "fwdprice": fwdprice, "expiry": expiry} ) fwd_index.append(d.copy()) - option_stack[expiry], line = parse_gs_block(fh, indextype) + option_stack[expiry], line = parse_gs_block(fh, d["index"]) else: logger.error("Can't parse expiry line:", line) elif line.startswith("Assumes"): @@ -488,10 +489,10 @@ def parse_gs(fh, indextype, series, quotedate, ref): return option_stack, fwd_index -def parse_citi(fh, indextype, series, quotedate): +def parse_citi(fh, index_desc, quotedate): option_stack = {} fwd_index = [] - d = {"quotedate": quotedate, "index": indextype, "series": series} + d = {"quotedate": quotedate, **index_desc} pat = re.compile(r"Exp: (\d{2}-\w{3}-\d{2})[^R]*Ref:[^\d]*([\d.]+)") for line in fh: line = line.strip() @@ -502,7 +503,7 @@ def parse_citi(fh, indextype, series, quotedate): expiry = pd.to_datetime(expiry, format="%d-%b-%y") d.update({"ref": ref, "expiry": expiry}) fwd_index.append(d.copy()) - option_stack[expiry] = parse_citi_block(fh, indextype) + option_stack[expiry] = parse_citi_block(fh, d["index"]) else: logger.error("Can't parse expiry line:", line) fwd_index = pd.DataFrame.from_records(fwd_index, index="quotedate") @@ -510,15 +511,15 @@ def parse_citi(fh, indextype, series, quotedate): return option_stack, fwd_index -def parse_cs(fh, indextype, series, quotedate): +def parse_cs(fh, index_desc, quotedate): option_stack = {} fwd_index = [] - d = {"quotedate": quotedate, "index": indextype, "series": series} + d = {"quotedate": quotedate, **index_desc} regex = { "HY": r"Ref:\s*(?P<ref>[\d.]+)\s*Fwd: (?P<fwdprice>[\d.]+)\s*Expiry: (?P<expiry>\d{2}-\w{3}-\d{2})", "IG": r"Ref:\s*(?P<ref>[\d.]+)\s*Fwd: (?P<fwdspread>[\d.]+)\s*Expiry: (?P<expiry>\d{2}-\w{3}-\d{2})\s*Fwd dv01:\s*(?P<fwdbpv>[\d.]*).*", } - pat = re.compile(regex[indextype]) + pat = re.compile(regex[d["index"]]) for line in fh: line = line.strip() @@ -528,7 +529,7 @@ def parse_cs(fh, indextype, series, quotedate): d.update(**m.groupdict()) d["expiry"] = pd.to_datetime(d["expiry"], format="%d-%b-%y") fwd_index.append(d.copy()) - option_stack[d["expiry"]] = parse_cs_block(fh, indextype) + option_stack[d["expiry"]] = parse_cs_block(fh, d["index"]) else: logger.error("Can't parse expiry line:", line, "filename:", fh.name) fwd_index = pd.DataFrame.from_records(fwd_index, index="quotedate") @@ -536,10 +537,10 @@ def parse_cs(fh, indextype, series, quotedate): return option_stack, fwd_index -def parse_bnp(fh, indextype, series, quotedate, expiration_dates): +def parse_bnp(fh, index_desc, quotedate, expiration_dates): option_stack = {} fwd_index = [] - d = {"quotedate": quotedate, "index": indextype, "series": series} + d = {"quotedate": quotedate, **index_desc} regex = r"Ref\s+(?P<ref>[\d.]+)\s+-\s+(?P<expiry>\w{3}\d{2})\s+-\s+Fwd\s+(?P<fwdspread>[\d.]+)" pat = re.compile(regex) for line in fh: @@ -551,14 +552,14 @@ def parse_bnp(fh, indextype, series, quotedate, expiration_dates): m = pat.match(line) if m: d.update(**m.groupdict()) - if indextype == "HY": + if d["index"] == "HY": d["fwdprice"] = d.pop("fwdspread") expiry_month = datetime.datetime.strptime(d["expiry"], "%b%y").month d["expiry"] = next( d for d in expiration_dates if d.month == expiry_month ) fwd_index.append(d.copy()) - option_stack[d["expiry"]] = parse_bnp_block(fh, indextype, c == -1) + option_stack[d["expiry"]] = parse_bnp_block(fh, d["index"], c == -1) else: logger.error(f"Can't parse expiry line: {line} for filename: {fh.name}") if fwd_index: @@ -630,15 +631,13 @@ def parse_email(email, date_received, conn): expiration_dates = list_imm_dates(quotedate) parse_fun = globals()[f"parse_{source.lower()}"] key = (quotedate, indextype, series, source) + index_desc = {"index": indextype, "series": series, "version": version} if source in ["BAML", "CITI", "CS"]: - return (key, parse_fun(fh, indextype, series, quotedate)) + return (key, parse_fun(fh, index_desc, quotedate)) elif source == "GS": - return (key, parse_fun(fh, indextype, series, quotedate, ref)) + return (key, parse_fun(fh, index_desc, quotedate, ref)) elif source == "BNP": - return ( - key, - parse_fun(fh, indextype, series, quotedate, expiration_dates), - ) + return (key, parse_fun(fh, index_desc, quotedate, expiration_dates)) else: option_stack = parse_fun(fh, indextype, expiration_dates) fwd_index = pd.DataFrame( |
