diff options
Diffstat (limited to 'python')
| -rw-r--r-- | python/quote_parsing/__main__.py | 3 | ||||
| -rw-r--r-- | python/quote_parsing/parse_emails.py | 55 |
2 files changed, 33 insertions, 25 deletions
diff --git a/python/quote_parsing/__main__.py b/python/quote_parsing/__main__.py index 1d84c44c..5fc6cd71 100644 --- a/python/quote_parsing/__main__.py +++ b/python/quote_parsing/__main__.py @@ -69,7 +69,7 @@ for col in ["fwdbpv", "fwdprice", "fwdspread", "ref"]: index_data[col] = pd.to_numeric(index_data[col]) index_data["index"] = index_data["index"].astype("category") -index_names = ["quotedate", "index", "series", "quote_source"] +index_names = ["quotedate", "index", "quote_source"] swaption_stack = pd.concat(swaption_stack, names=index_names, sort=False) dup = swaption_stack.index.duplicated() if dup.any(): @@ -84,7 +84,6 @@ index_data = index_data.reset_index() index_data = index_data.drop_duplicates( ["quotedate", "index", "series", "version", "expiry", "quote_source"] ) -index_data = index_data.sort_values(by=["expiry", "version"]) write_todb(swaption_stack, index_data, conn) serenitas_pool.putconn(conn) diff --git a/python/quote_parsing/parse_emails.py b/python/quote_parsing/parse_emails.py index f3fc2e21..e70fd2b1 100644 --- a/python/quote_parsing/parse_emails.py +++ b/python/quote_parsing/parse_emails.py @@ -147,7 +147,9 @@ def parse_baml(fh, index_desc, *args): d = parse_refline(line) d.update(index_desc) df, line = parse_baml_block(fh, index_desc["index"]) - option_stack[(d["expiry"], index_desc["version"])] = df + option_stack[ + (d["expiry"], index_desc["series"], index_desc["version"]) + ] = df fwd_index.append(d) else: line = "" @@ -444,10 +446,10 @@ def parse_jpm_block(fh, indextype): ] rec_data = rec_data.replace("/", " ") if rec_strike != "-": - receivers[rec_strike] = rec_data.split()[:-2] + receivers[rec_strike] = rec_data.split()[:4] pay_data = pay_data.replace("/", " ") if pay_strike != "-": - payers[pay_strike] = pay_data.split()[:-2] + payers[pay_strike] = pay_data.split()[:4] cols = ["bid", "offer", "delta", "vol"] pay = pd.DataFrame.from_dict(payers, orient="index", columns=cols).add_prefix( @@ -491,7 +493,9 @@ def parse_ms(fh, index_desc, *args): if block is None or block.empty: logger.warning("MS: block is empty for {expiry} expiry") else: - option_stack[(expiry, index_desc["version"])] = block + option_stack[ + (expiry, index_desc["series"], index_desc["version"]) + ] = block return option_stack, fwd_index @@ -503,7 +507,7 @@ def parse_nom(fh, index_desc, *args): expiry = line.split(" ")[0] expiry = pd.to_datetime(expiry, format="%d-%b-%y") next_line, df = parse_nomura_block(fh, index_desc["index"]) - option_stack[(expiry, index_desc["version"])] = df + option_stack[(expiry, index_desc["series"], index_desc["version"])] = df fwd_index.append({"expiry": expiry, **index_desc}) if next_line: if "EXPIRY" in next_line: @@ -527,7 +531,7 @@ def parse_sg(fh, index_desc): line = line.rstrip() if line.startswith("Type"): expiry, df = parse_sg_block(fh, index_desc["index"], expiration_dates) - option_stack[(expiry, index_desc["version"])] = df + option_stack[(expiry, index_desc["series"], index_desc["version"])] = df fwd_index.append({"expiry": expiry, **index_desc}) return option_stack, fwd_index @@ -555,7 +559,9 @@ def parse_gs(fh, index_desc): ) try: ( - option_stack[(expiry, index_desc["version"])], + option_stack[ + (expiry, index_desc["series"], index_desc["version"]) + ], line, ) = parse_gs_block(fh, index_desc["index"]) except IndexError as e: @@ -586,9 +592,9 @@ def parse_citi(fh, index_desc): expiry, ref = m.groups() expiry = pd.to_datetime(expiry, format="%d-%b-%y") fwd_index.append({"ref": ref, "expiry": expiry, **index_desc}) - option_stack[(expiry, index_desc["version"])] = parse_citi_block( - fh, index_desc["index"] - ) + option_stack[ + (expiry, index_desc["series"], index_desc["version"]) + ] = parse_citi_block(fh, index_desc["index"]) else: logger.error("Can't parse expiry line: %s", line) return option_stack, fwd_index @@ -610,9 +616,9 @@ def parse_cs(fh, index_desc): d = m.groupdict() d["expiry"] = pd.to_datetime(d["expiry"], format="%d-%b-%y") fwd_index.append({**index_desc, **d}) - option_stack[(d["expiry"], index_desc["version"])] = parse_cs_block( - fh, index_desc["index"] - ) + option_stack[ + (d["expiry"], index_desc["series"], index_desc["version"]) + ] = parse_cs_block(fh, index_desc["index"]) else: logger.error( "Can't parse expiry line: %s for filename: %s", line, fh.name @@ -641,9 +647,9 @@ def parse_bnp(fh, index_desc): d for d in expiration_dates if d.month == expiry_month ) fwd_index.append({**index_desc, **d}) - option_stack[(d["expiry"], index_desc["version"])] = parse_bnp_block( - fh, index_desc["index"], c == -1 - ) + option_stack[ + (d["expiry"], index_desc["series"], index_desc["version"]) + ] = parse_bnp_block(fh, index_desc["index"], c == -1) else: logger.error(f"Can't parse expiry line: {line} for filename: {fh.name}") return option_stack, fwd_index @@ -652,7 +658,7 @@ def parse_bnp(fh, index_desc): def parse_jpm(fh, index_desc): option_stack = {} fwd_index = [] - regex = r"JPM (CDX|iTrx) Options: (HY|IG|MAIN|XOVER|FINSEN) \(\w\d+V(?P<version>\d+)\) (?P<expiry>[\d]+-[\w]+-[\d]+) \*\* Fwd @(?P<fwdref>[\d.]+)" + regex = r"JPM (CDX|iTrx) Options: (HY|IG|MAIN|XOVER|FINSEN) \(\w(?P<series>\d+)V(?P<version>\d+)\) (?P<expiry>[\d]+-[\w]+-[\d]+) \*\* Fwd @(?P<fwdref>[\d.]+)" pat = re.compile(regex) line = next(fh).strip() while True: @@ -666,9 +672,10 @@ def parse_jpm(fh, index_desc): fwd_index.append({**index_desc, **d}) try: - option_stack[(d["expiry"], d["version"])], line = parse_jpm_block( - fh, index_desc["index"] - ) + ( + option_stack[(d["expiry"], d["series"], d["version"])], + line, + ) = parse_jpm_block(fh, index_desc["index"]) except IndexError as e: logger.debug(traceback.format_exc()) logger.error(f"Something is wrong with file {Path(fh.name).stem}") @@ -736,7 +743,9 @@ def parse_gs_eu(fh, index_desc): expiry = pd.to_datetime(expiry, format="%b%y") try: ( - option_stack[(expiry, index_desc["version"])], + option_stack[ + (expiry, index_desc["series"], index_desc["version"]) + ], line, ) = parse_gs_eu_block(fh, index_desc["index"]) fwd_index.append({**index_desc, "expiry": expiry}) @@ -784,7 +793,7 @@ regex_dict = { ): ("CS", parse_cs), re.compile(r"CDX OPTIONS RUN: (IG|HY)(\d{2}).*"): ("BNP", parse_bnp), re.compile( - r"JPM (?:CDX|iTrx) Options: (?:CDX|ITRAXX).(IG|HY|XOVER|MAIN|FINSEN) S(\d+) 5Y (?:V2&V1 )?\S+(?:-\S+)? \[ref ([\d.]*)\]" + r"JPM (?:CDX|iTrx) Options: (?:CDX|ITRAXX).(IG|HY|XOVER|MAIN|FINSEN) S(\d+)(?:\/\d+)? 5Y (?:V2&V1 )?\S+(?:-\S+)? \[ref ([\d.]*)\]" ): ("JPM", parse_jpm), re.compile( r"GS Options - iTraxx (Xover|Main|FinSen)(\d+) 5Y V(\d+) Options Run - Ref ([\d.]+)" @@ -839,7 +848,7 @@ def parse_email(email: Path, date_received: datetime.date, conn): fh.seek(cur_pos) if version is None: version = get_version(indextype, series, quotedate) - key = (quotedate, indextype, series, source) + key = (quotedate, indextype, source) index_desc = { "quotedate": quotedate, "index": indextype, |
