1 files changed, 70 insertions, 26 deletions
diff --git a/python/parse_emails.py b/python/parse_emails.py
index 5c5fb8bd..c31bf1f7 100644
--- a/python/parse_emails.py
+++ b/python/parse_emails.py
@@ -40,13 +40,13 @@ def parse_quotedate(fh, date_received):
     for line in fh:
         line = line.rstrip()
         if line.startswith("At"):
-            for p in ['%m/%d  %H:%M:%S', '%b  %d %Y %H:%M:%S']:
+            for p in ['%m/%d/%y %H:%M:%S', '%b  %d %Y %H:%M:%S', '%m/%d  %H:%M:%S']:
                 try:
                     quotedate = pd.to_datetime(line, format=p, exact=False)
                 except ValueError:
                     continue
                 else:
-                    if quotedate.year == 1900:
+                    if quotedate.year == 1900: # p='%m/%d  %H:%M:%S'
                         quotedate = quotedate.replace(year=date_received.year)
                     break
             else:
@@ -132,6 +132,27 @@ def parse_ms_block(fh, indextype):
         r.append(vals)
     return makedf(r, indextype, "MS")
 
+def parse_nomura_block(fh, indextype):
+    next(fh) ## skip header
+    r = []
+    for line in fh:
+        line = line.rstrip()
+        if line == "":
+            break
+        strike, receiver, payer, vol, _ = line.split("|", 4)
+        strike = strike.strip()
+        pay, pay_delta = payer.strip().split()
+        rec, rec_delta = receiver.strip().split()
+        pay_bid, pay_offer = pay.split("/")
+        rec_bid, rec_offer = rec.split("/")
+        vol = vol.strip()
+        vals = [strike, rec_bid, rec_offer, rec_delta,
+                pay_bid, pay_offer, pay_delta, vol]
+        if indextype == "HY": # we don't have price vol
+            vals.append(None)
+        r.append(vals)
+    return makedf(r, indextype, "NOM")
+
 def parse_ms(fh, indextype):
     option_stack = {}
     for line in fh:
@@ -142,33 +163,46 @@ def parse_ms(fh, indextype):
             option_stack[expiry] = parse_ms_block(fh, indextype)
     return option_stack
 
-subject_BAML = re.compile("(?:Fwd:){0,2}(?:BAML )?(\w{2})([0-9]{1,2})\s")
-subject_MS = re.compile("\$\$ MS CDX OPTIONS: (IG|HY)(\d{2})[^\d]*([\d.]+)")
+def parse_nomura(fh, indextype):
+    option_stack = {}
+    for line in fh:
+        line = line.rstrip()
+        if "EXPIRY" in line:
+            expiry = line.split(" ")[0]
+            expiry = pd.to_datetime(expiry, format="%d-%b-%y")
+            option_stack[expiry] = parse_nomura_block(fh, indextype)
+    return option_stack
+
+subject_baml = re.compile("(?:Fwd:){0,2}(?:BAML )?(\w{2})([0-9]{1,2})\s")
+subject_ms = re.compile("[^$]*\$\$ MS CDX OPTIONS: (IG|HY)(\d{2})[^-]*- REF[^\d]*([\d.]+)")
+subject_nomura = re.compile("(?:Fwd:)?CDX (IG|HY)(\d{2}).*- REF:[^\d]*([\d.]+)")
 
 def parse_email(email):
     with open(email.path, "rt") as fh:
         date_received = datetime.datetime.fromtimestamp(int(fh.readline())/1000)
         subject = next(fh)
-        m = subject_BAML.match(subject)
-        if m:
-            indextype, series = m.groups()
-            series = int(series)
-            quotedate = parse_quotedate(fh, date_received)
-            return (quotedate, indextype, series), parse_baml(fh, indextype, series, quotedate)
-        m = subject_MS.match(subject)
-        if m:
-            indextype, series, ref = m.groups()
-            series = int(series)
-            ref = float(ref)
-            quotedate = parse_quotedate(fh, date_received)
-            option_stack = parse_ms(fh, indextype)
-            fwd_index = pd.DataFrame({'quotedate': quotedate,
-                                      'ref': ref,
-                                      'index': indextype,
-                                      'series': series,
-                                      'expiry': list(option_stack.keys())})
-            fwd_index.set_index('quotedate', inplace = True)
-            return (quotedate, indextype, series), (option_stack, fwd_index)
+        for source in ['BAML', 'MS', 'NOMURA']:
+            m = globals()['subject_'+source.lower()].match(subject)
+            if m:
+                if source == 'BAML':
+                    indextype, series = m.groups()
+                else:
+                    indextype, series, ref = m.groups()
+                    ref = float(ref)
+                series = int(series)
+                quotedate = parse_quotedate(fh, date_received)
+
+                if source == 'BAML':
+                    return (quotedate, indextype, series), parse_baml(fh, indextype, series, quotedate)
+                else:
+                    option_stack = globals()['parse_'+source.lower()](fh, indextype)
+                    fwd_index = pd.DataFrame({'quotedate': quotedate,
+                                              'ref': ref,
+                                              'index': indextype,
+                                              'series': series,
+                                              'expiry': list(option_stack.keys())})
+                    fwd_index.set_index('quotedate', inplace = True)
+                    return (quotedate, indextype, series), (option_stack, fwd_index)
         raise RuntimeError("can't parse subject line: {0} for email {1}".format(
             subject, email.name))
 
@@ -180,7 +214,9 @@ def write_todb(swaption_stack, index_data):
     psycopg2.extensions.register_adapter(float, nan_to_null)
     meta = MetaData(bind=serenitasdb)
     swaption_quotes = Table('swaption_quotes', meta, autoload=True)
-    ins = swaption_quotes.insert().values(swaption_stack.to_dict(orient='records')).execute()
+    for r in swaption_stack.to_dict(orient='records'):
+        serenitasdb.execute(swaption_quotes.insert(), r)
+    #ins = swaption_quotes.insert().values(swaption_stack.to_dict(orient='records')).execute()
     index_data.to_sql('swaption_ref_quotes', serenitasdb, if_exists='append', index=False)
 
 def get_email_list(date):
@@ -197,6 +233,13 @@ def get_email_list(date):
     df = df.reset_index().set_index('quotedate')
     return df.loc[date,'index'].tolist()
 
+def pickle_drop_date(date):
+    with open(".pickle", "rb") as fh:
+        already_uploaded = pickle.load(fh)
+    newdict = {k: v for k, v in already_uploaded.items() if v.date() != date}
+    with open(".pickle", "wb") as fh:
+        pickle.dump(newdict, fh)
+
 if __name__=="__main__":
     update_emails()
     data_dir = os.path.join(os.getenv("DATA_DIR"), "swaptions")
@@ -223,7 +266,8 @@ if __name__=="__main__":
     if index_data.empty:
         sys.exit()
     for col in ['fwdbpv', 'fwdprice', 'fwdspread', 'ref']:
-        index_data[col] = index_data[col].astype('float')
+        if col in index_data:
+            index_data[col] = index_data[col].astype('float')
     index_data['index'] = index_data['index'].astype('category')
 
     swaption_stack = pd.concat(swaption_stack, names=['quotedate', 'index', 'series'])