diff options
Diffstat (limited to 'python')
| -rw-r--r-- | python/quote_parsing/parse_emails.py | 45 |
1 files changed, 33 insertions, 12 deletions
diff --git a/python/quote_parsing/parse_emails.py b/python/quote_parsing/parse_emails.py index d5538204..551db853 100644 --- a/python/quote_parsing/parse_emails.py +++ b/python/quote_parsing/parse_emails.py @@ -47,8 +47,8 @@ def makedf(r, indextype, quote_source): "price_vol", ] if quote_source == "BAML": - cols.append("gamma") - if quote_source == "GS": + cols.append("gamma") # (ref_id,) = next(c) + if quote_source == "GS" and region == "US": cols.append("tail") df = pd.DataFrame.from_records(r, columns=cols) for col in ["delta_rec", "delta_pay", "vol", "price_vol", "gamma", "tail"]: @@ -705,14 +705,14 @@ def parse_gs_block_eu(fh, indextype): strike, rec_bid, rec_offer, - delta, + 1 + delta_pay, pay_bid, pay_offer, - delta, + delta_pay, vol, ] ) - return makedf(r, indextype, "GS") + return makedf(r, indextype, "GS", "EU") def parse_gs_eu(fh, index_desc): @@ -727,7 +727,19 @@ def parse_gs_eu(fh, index_desc): if m := pat.match(line): expiry = m.groups() expiry = pd.to_datetime(expiry, format="%b%y") - parse_gs_block_eu + option_stack[(expiry, index_desc["version"])] = parse_gs_block_eu( + fh, indexdesc["index"] + ) + else: + logger.error("Can't parse expiry line:", line, "filename:", fh.name) + elif line.startswith("Keywords"): + break + else: + try: + line = next(fh).strip() + except StopIteration: + break + return options_stack, fwd_index # subject_baml = re.compile(r"(?:Fwd:){0,2}(?:BAML )?(\D{2})(\d{1,2})\s") @@ -735,32 +747,39 @@ def parse_gs_eu(fh, index_desc): regex_dict = { re.compile(r"(?:Fwd:){0,2}(?:BofA )?(\D{2})(\d{1,2}).*Ref[^\d]*([\d.]+)"): ( "BAML", + "US", parse_baml, ), re.compile( r"[^$]*\${1,2} MS CDX OPTIONS: (IG|HY)(\d{2})[^-]*- REF[^\d]*([\d.]+)" - ): ("MS", parse_ms), + ): ("MS", "US", parse_ms), re.compile(r"(?:Fwd:)?CDX (IG|HY)(\d{2}).*- REF:[^\d]*([\d.]+)"): ( "NOM", + "US", parse_nom, ), re.compile(r"(?:FW: |Fwd: )?GS (IG|HY)(\d{2}) 5y.*- Ref [^\d]*([\d.]+)"): ( "GS", + "US", parse_gs, ), re.compile(r"SG OPTIONS - CDX (IG|HY) S(\d{2}).* REF[^\d]*([\d.]+)"): ( "SG", + "US", parse_sg, ), - re.compile(r"(?:Fwd:)?Citi Options: (IG|HY)(\d{2}) 5Y"): ("CITI", parse_citi), + re.compile(r"(?:Fwd:)?Citi Options: (IG|HY)(\d{2}) 5Y"): ("CITI", "US", parse_citi), re.compile( r"CS CDX (?P<index>IG|HY)(?P<series>\d{2})_?v?(?P<version>\d)? Options -\s+(?:\d{2}/\d{2}/\d{2}\s+)?Ref = (?P<ref>[\d.]+)[^\d]*" - ): ("CS", parse_cs), - re.compile(r"CDX OPTIONS RUN: (IG|HY)(\d{2}).*"): ("BNP", parse_bnp), + ): ("CS", "US", parse_cs), + re.compile(r"CDX OPTIONS RUN: (IG|HY)(\d{2}).*"): ("BNP", "US", parse_bnp), # JPM works on both europe and us so we won't need to make changes re.compile( r"JPM (?:CDX|iTrx) Options: (?:CDX|ITRAXX).(IG|HY|XOVER|MAIN) S(\d+) 5Y (?:V2&V1 )?\S+-\S+ \[ref ([\d.]*)\]" - ): ("JPM", parse_jpm), + ): ("JPM", "USEU", parse_jpm), + re.compile( + r"GS Options - iTraxx ([A-Za-z]+)(\d+) 5Y V(\d+) Options Run - Ref ([\d.]+)" + ): ("GS", "EU", parse_gs_eu), } @@ -780,7 +799,7 @@ def parse_email(email: Path, date_received: datetime.date, conn): with email.open("rt") as fh: subject = fh.readline().lstrip() - for regex, (source, parse_fun) in regex_dict.items(): + for regex, (source, region, parse_fun) in regex_dict.items(): if (m := regex.match(subject)) : version = None if source in ["CITI", "BNP"]: @@ -791,6 +810,8 @@ def parse_email(email: Path, date_received: datetime.date, conn): indextype = d["index"] series = d["series"] ref = float(d["ref"]) + elif source == "GS" and region == "EU": + indextype, series, version, ref else: indextype, series, ref = m.groups() indextype = "EU" if indextype == "MAIN" else indextype |
