1 files changed, 54 insertions, 3 deletions
diff --git a/python/quote_parsing/parse_emails.py b/python/quote_parsing/parse_emails.py
index 33a029ea..0b2f05a7 100644
--- a/python/quote_parsing/parse_emails.py
+++ b/python/quote_parsing/parse_emails.py
@@ -53,7 +53,7 @@ def makedf(r, indextype, quote_source):
                 df[col] = df[col].str.rstrip("%").astype("float") / 100
             except ValueError:  # typo in one email
                 df[col] = (
-                    df[col].str.rstrip("%").str.replace("n", "").astype("float") / 100
+                    pd.to_numeric(df[col].str.rstrip("%").str.replace("n", "")) / 100
                 )
     if quote_source == "GS":
         for col in ["pay_bid", "pay_offer", "rec_bid", "rec_offer"]:
@@ -156,6 +156,24 @@ def parse_baml_block(fh, indextype):
     return makedf(r, indextype, "BAML"), line
 
 
+def parse_bnp_block(fh, indextype):
+    next(fh)  # skip header
+    r = []
+    for line in fh:
+        line = line.strip()
+        if line.startswith("Ref") or line == "":
+            break
+        line = re.sub("[/]", " ", line)
+        vals = re.sub(" +", " ", line).rstrip().split(" ")
+        if indextype == "HY":
+            vals += [""]
+        if len(vals) < 3:  # something went wrong
+            line = ""
+            break
+        r.append(vals)
+    return makedf(r, indextype, "BNP")
+
+
 def parse_cs_block(fh, indextype):
     next(fh)  # skip header
     r = []
@@ -509,6 +527,33 @@ def parse_cs(fh, indextype, series, quotedate):
     return option_stack, fwd_index
 
 
+def parse_bnp(fh, indextype, series, quotedate, expiration_dates):
+    option_stack = {}
+    fwd_index = []
+    d = {"quotedate": quotedate, "index": indextype, "series": series}
+    regex = (
+        r"Ref\s(?P<ref>[\d.]+) - (?P<expiry>\w{3}\d{2}) - Fwd\s(?P<fwdspread>[\d.]+)"
+    )
+    pat = re.compile(regex)
+    for line in fh:
+        lin = line.strip()
+        if line.startswith("Ref"):
+            m = pat.match(line)
+            if m:
+                d.update(**m.groupdict())
+                expiry_month = datetime.datetime.strptime(d["expiry"], "%b%y").month
+                d["expiry"] = next(
+                    d for d in expiration_dates if d.month == expiry_month
+                )
+                fwd_index.append(d.copy())
+                option_stack[d["expiry"]] = parse_bnp_block(fh, indextype)
+            else:
+                logger.error("Can't parse expiry line:", line, "filename:", fh.name)
+    fwd_index = pd.DataFrame.from_records(fwd_index, index="quotedate")
+    fwd_index["quote_source"] = "BNP"
+    return option_stack, fwd_index
+
+
 subject_baml = re.compile(r"(?:Fwd:){0,2}(?:BAML )?(\w{2})([0-9]{1,2})\s")
 subject_ms = re.compile(
     r"[^$]*\$\$ MS CDX OPTIONS: (IG|HY)(\d{2})[^-]*- REF[^\d]*([\d.]+)"
@@ -518,16 +563,17 @@ subject_gs = re.compile(r"(?:FW: |Fwd: )?GS (IG|HY)(\d{2}) 5y.*- Ref [^\d]*([\d.
 subject_sg = re.compile(r"SG OPTIONS - CDX (IG|HY) S(\d{2}).* REF[^\d]*([\d.]+)")
 subject_citi = re.compile(r"(?:Fwd:)?Citi Options: (IG|HY)(\d{2}) 5Y")
 subject_cs = re.compile(r"CS CDX (IG|HY)(\d{2}) Options - Ref = ([\d.]+)[^\d]*")
+subject_bnp = re.compile(r"CDX OPTIONS RUN: (IG|HY)(\d{2}).*")
 
 
 def parse_email(email, date_received):
     with email.open("rt") as fh:
         subject = fh.readline().lstrip()
 
-        for source in ["BAML", "GS", "MS", "NOM", "SG", "CITI", "CS"]:
+        for source in ["BAML", "GS", "MS", "NOM", "SG", "CITI", "CS", "BNP"]:
             m = globals()[f"subject_{source.lower()}"].match(subject)
             if m:
-                if source in ["BAML", "CITI"]:
+                if source in ["BAML", "CITI", "BNP"]:
                     indextype, series = m.groups()
                 else:
                     indextype, series, ref = m.groups()
@@ -553,6 +599,11 @@ def parse_email(email, date_received):
                     return (key, parse_fun(fh, indextype, series, quotedate))
                 elif source == "GS":
                     return (key, parse_fun(fh, indextype, series, quotedate, ref))
+                elif source == "BNP":
+                    return (
+                        key,
+                        parse_fun(fh, indextype, series, quotedate, expiration_dates),
+                    )
                 else:
                     option_stack = parse_fun(fh, indextype, expiration_dates)
                     fwd_index = pd.DataFrame(