diff options
Diffstat (limited to 'python/quote_parsing/parse_emails.py')
| -rw-r--r-- | python/quote_parsing/parse_emails.py | 68 |
1 files changed, 26 insertions, 42 deletions
diff --git a/python/quote_parsing/parse_emails.py b/python/quote_parsing/parse_emails.py index eac25f53..4300b267 100644 --- a/python/quote_parsing/parse_emails.py +++ b/python/quote_parsing/parse_emails.py @@ -132,7 +132,7 @@ def parse_refline(line): return d -def parse_baml_us(fh, index_desc, *args): +def parse_baml(fh, index_desc, *args): option_stack = {} fwd_index = [] line = "" @@ -474,7 +474,7 @@ def parse_jpm_block(fh, indextype): return makedf(df, indextype, "JPM"), line -def parse_ms_us(fh, index_desc, *args): +def parse_ms(fh, index_desc, *args): option_stack = {} fwd_index = [] for line in fh: @@ -491,7 +491,7 @@ def parse_ms_us(fh, index_desc, *args): return option_stack, fwd_index -def parse_nom_us(fh, index_desc, *args): +def parse_nom(fh, index_desc, *args): option_stack = {} fwd_index = [] @@ -514,7 +514,7 @@ def parse_nom_us(fh, index_desc, *args): return option_stack, fwd_index -def parse_sg_us(fh, index_desc): +def parse_sg(fh, index_desc): option_stack = {} fwd_index = [] @@ -528,7 +528,7 @@ def parse_sg_us(fh, index_desc): return option_stack, fwd_index -def parse_gs_us(fh, index_desc): +def parse_gs(fh, index_desc): option_stack = {} fwd_index = [] pat = re.compile(r"Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)") @@ -571,21 +571,7 @@ def parse_gs_us(fh, index_desc): return option_stack, fwd_index -def parse_gs_eu(fh, index_desc): - option_stack = {} - fwd_index = [] - - pat = re.compile(r"(\w{3}\d{2})") - line = next(fh).strip() - while True: - if ("|" in line) and ("STK" not in line): - line = next(fh).strip() - if m := pat.match(line): - expiry = m.groups() - expiry = pd.to_datetime(expiry, format="%b%y") - - -def parse_citi_us(fh, index_desc): +def parse_citi(fh, index_desc): option_stack = {} fwd_index = [] pat = re.compile(r"Exp: (\d{2}-\w{3}-\d{2})[^R]*Ref:[^\d]*([\d.]+)") @@ -604,7 +590,7 @@ def parse_citi_us(fh, index_desc): return option_stack, fwd_index -def parse_cs_us(fh, index_desc): +def parse_cs(fh, index_desc): option_stack = {} fwd_index = [] regex = { @@ -628,7 +614,7 @@ def parse_cs_us(fh, index_desc): return option_stack, fwd_index -def parse_bnp_us(fh, index_desc): +def parse_bnp(fh, index_desc): option_stack = {} fwd_index = [] regex = r"Ref\s+(?P<ref>[\d.]+)\s+-\s+(?P<expiry>\w{3}\d{2})\s+-\s+Fwd\s+(?P<fwdspread>[\d.]+)" @@ -657,7 +643,7 @@ def parse_bnp_us(fh, index_desc): return option_stack, fwd_index -def parse_jpm_useu(fh, index_desc): +def parse_jpm(fh, index_desc): option_stack = {} fwd_index = [] regex = r"JPM (CDX|iTrx) Options: (HY|IG|MAIN|XOVER) \(\w\d+V(?P<version>\d+)\) (?P<expiry>[\d]+-[\w]+-[\d]+) \*\* Fwd @(?P<fwdref>[\d.]+)" @@ -671,7 +657,6 @@ def parse_jpm_useu(fh, index_desc): "fwdref" ) d["expiry"] = pd.to_datetime(d["expiry"], format="%d-%b-%y") - index_desc["version"] = d["version"] fwd_index.append({**index_desc, **d}) try: @@ -697,26 +682,26 @@ def parse_jpm_useu(fh, index_desc): # subject_baml = re.compile(r"(?:Fwd:){0,2}(?:BAML )?(\D{2})(\d{1,2})\s") regex_dict = { - ("BAML", "US"): re.compile( + re.compile( r"(?:Fwd:){0,2}(?:BofA )?(\D{2})(\d{1,2}).*Ref[^\d]*([\d.]+)" - ), - ("MS", "US"): re.compile( + ): ("BAML", parse_baml), + re.compile( r"[^$]*\${1,2} MS CDX OPTIONS: (IG|HY)(\d{2})[^-]*- REF[^\d]*([\d.]+)" - ), - ("NOM", "US"): re.compile(r"(?:Fwd:)?CDX (IG|HY)(\d{2}).*- REF:[^\d]*([\d.]+)"), - ("GS", "US"): re.compile( + ): ("MS", parse_ms), + re.compile(r"(?:Fwd:)?CDX (IG|HY)(\d{2}).*- REF:[^\d]*([\d.]+)"): ("NOM", parse_nom), + re.compile( r"(?:FW: |Fwd: )?GS (IG|HY)(\d{2}) 5y.*- Ref [^\d]*([\d.]+)" - ), - ("SG", "US"): re.compile(r"SG OPTIONS - CDX (IG|HY) S(\d{2}).* REF[^\d]*([\d.]+)"), - ("CITI", "US"): re.compile(r"(?:Fwd:)?Citi Options: (IG|HY)(\d{2}) 5Y"), - ("CS", "US"): re.compile( + ): ("GS", parse_gs), + re.compile(r"SG OPTIONS - CDX (IG|HY) S(\d{2}).* REF[^\d]*([\d.]+)"): ("SG", parse_sg), + re.compile(r"(?:Fwd:)?Citi Options: (IG|HY)(\d{2}) 5Y"): ("CITI", parse_citi), + re.compile( r"CS CDX (?P<index>IG|HY)(?P<series>\d{2})_?v?(?P<version>\d)? Options -\s+(?:\d{2}/\d{2}/\d{2}\s+)?Ref = (?P<ref>[\d.]+)[^\d]*" - ), - ("BNP", "US"): re.compile(r"CDX OPTIONS RUN: (IG|HY)(\d{2}).*"), + ): ("CS", parse_cs), + re.compile(r"CDX OPTIONS RUN: (IG|HY)(\d{2}).*"): ("BNP", parse_bnp), # JPM works on both europe and us so we won't need to make changes - ("JPM", "USEU"): re.compile( - r"JPM (?:CDX|iTrx) Options: (?:CDX|ITRAXX).(IG|HY|XOVER|MAIN) S(\d+) 5Y (?:V2&V1 )?\S+-\S+ \[ref ([\d.]*)\].*" - ), + re.compile( + r"JPM (?:CDX|iTrx) Options: (?:CDX|ITRAXX).(IG|HY|XOVER|MAIN) S(\d+) 5Y (?:V2&V1 )?\S+-\S+ \[ref ([\d.]*)\]" + ): ("JPM", parse_jpm), } @@ -736,7 +721,7 @@ def parse_email(email: Path, date_received: datetime.date, conn): with email.open("rt") as fh: subject = fh.readline().lstrip() - for (source, region), regex in regex_dict.items(): + for regex, (source, parse_fun) in regex_dict.items(): if (m := regex.match(subject)) : version = None if source in ["CITI", "BNP"]: @@ -767,7 +752,7 @@ def parse_email(email: Path, date_received: datetime.date, conn): fh.seek(cur_pos) if version is None: version = get_version(indextype, series, quotedate) - parse_fun = globals()[f"parse_{source.lower()}_{region.lower()}"] + key = (quotedate, indextype, series, source) index_desc = { "quotedate": quotedate, "index": indextype, @@ -779,7 +764,6 @@ def parse_email(email: Path, date_received: datetime.date, conn): if source in ["BNP", "SG"]: index_desc["expiration_dates"] = list_imm_dates(quotedate) option_stack, fwd_index = parse_fun(fh, index_desc) - key = (quotedate, indextype, series, source) if fwd_index: fwd_index = pd.DataFrame.from_records(fwd_index, index="quotedate") fwd_index["quote_source"] = source |
