2 files changed, 89 insertions, 35 deletions
diff --git a/python/parse_emails.py b/python/parse_emails.py
index f9ad9d9c..79d54a81 100644
--- a/python/parse_emails.py
+++ b/python/parse_emails.py
@@ -4,22 +4,26 @@ from pathlib import Path
 import pdb
 from download_emails import update_emails
 import datetime
+import sys
 
 def makedf(r, indextype):
     if indextype=='IG':
-        cols = ['Strike', 'RecBid', 'RecOffer', 'DeltaRec', 'PayBid',
-                'PayOffer', 'DeltaPay', 'Vol', 'Gamma']
+        cols = ['strike', 'rec_bid', 'rec_offer', 'delta_rec', 'pay_bid',
+                'pay_offer', 'delta_pay', 'vol', 'gamma']
     else:
-        cols = ['Strike', 'RecBid', 'RecOffer', 'DeltaRec', 'PayBid',
-                'PayOffer', 'DeltaPay', 'Vol', 'PxVol', 'Gamma']
+        cols = ['strike', 'rec_bid', 'rec_offer', 'delta_rec', 'pay_bid',
+                'pay_offer', 'delta_pay', 'vol', 'price_vol', 'gamma']
     df = pd.DataFrame.from_records(r, columns = cols)
-    for col in ['DeltaRec', 'DeltaPay', 'Vol', 'PxVol', 'Gamma']:
+    for col in ['delta_rec', 'delta_pay', 'vol', 'price_vol', 'gamma']:
         if col in df:
             df[col] = df[col].str.strip("%").astype('float')/100
     for k in df:
-        if df.dtypes[k]=='object':
-            df[k] = pd.to_numeric(df[k])
-    df.set_index('Strike', inplace=True)
+        if df.dtypes[k] == 'object':
+            try:
+                df[k] = pd.to_numeric(df[k])
+            except ValueError:
+                pdb.set_trace()
+    df.set_index('strike', inplace=True)
     return df
 
 def parse_email(email_path):
@@ -59,7 +63,7 @@ def parse_email(email_path):
                 try:
                     d = m.groupdict()
                     d['quotedate'] = quotedate
-                    d['indextype'] = indextype
+                    d['index'] = indextype
                     d['series'] = series
                     d['expiry'] = pd.to_datetime(d['expiry'], format='%d-%b-%y')
                 except AttributeError:
@@ -92,23 +96,45 @@ def parse_email(email_path):
             raise RuntimeError("empty email: {0}".format(email_path.name))
 
 if __name__=="__main__":
+    import pickle
     update_emails()
     emails = [f for f in Path("../../data/swaptions").iterdir() if f.is_file()]
     swaption_stack = {}
     index_data = pd.DataFrame()
+    with open(".pickle", "rb") as fh:
+        already_uploaded = pickle.load(fh)
     for f in emails:
+        if f.name in already_uploaded:
+            continue
+        else:
+            already_uploaded.add(f.name)
         try:
             key, option_stack, fwd_index = parse_email(f)
         except RuntimeError as e:
             print(e)
         else:
-            swaption_stack[key] = pd.concat(option_stack, names=['expiry', 'Strike'])
+            swaption_stack[key] = pd.concat(option_stack, names=['expiry', 'strike'])
             index_data = index_data.append(fwd_index)
+    if index_data.empty:
+        sys.exit()
     for col in ['fwdbpv', 'fwdprice', 'fwdspread', 'ref']:
         index_data[col] = index_data[col].astype('float')
-    index_data['indextype'] = index_data['indextype'].astype('category')
-    swaption_stack = pd.concat(swaption_stack, names=['quotedate', 'indextype', 'series'])
-    with pd.HDFStore('../../data/swaptions.hdf', mode = 'w', complevel=4,
-                     complib='blosc', fletcher32=True) as swaptions:
-        swaptions.append('swaptions', swaption_stack)
-        swaptions.append('index_data', index_data)
+    index_data['index'] = index_data['index'].astype('category')
+
+    swaption_stack = pd.concat(swaption_stack, names=['quotedate', 'index', 'series'])
+    import feather
+    feather.write_dataframe(swaption_stack, '../../data/swaptions.fth')
+    feather.write_dataframe(index_data, '../../data/index_data.fth')
+
+    swaption_stack = swaption_stack.drop_duplicates()
+    swaption_stack = swaption_stack.reset_index()
+    index_data = index_data.drop_duplicates()
+    from db import dbengine
+    serenitasdb  = dbengine('serenitasdb')
+    from sqlalchemy import MetaData, Table
+    meta = MetaData(bind=serenitasdb)
+    swaption_quotes = Table('swaption_quotes', meta, autoload=True)
+    ins = swaption_quotes.insert().values(swaption_stack.to_dict(orient='records')).execute()
+    index_data.to_sql('swaption_ref_quotes', serenitasdb, if_exists='append')
+    with open(".pickle", "wb") as fh:
+        pickle.dump(already_uploaded, fh)
diff --git a/python/parse_gs.py b/python/parse_gs.py
index 999d3e95..7a0ec006 100644
--- a/python/parse_gs.py
+++ b/python/parse_gs.py
@@ -2,24 +2,27 @@ import pandas as pd
 import pdb
 import re
 import os
-import pdb
 
-os.chdir("quotes")
-for f in os.listdir("."):
-    with open(os.path.abspath(f), "rb") as fh:
+data_dir = "/home/share/guillaume/swaptions"
+all_df = {}
+fwd_index = []
+for f in os.listdir(data_dir):
+    print(f)
+    with open(os.path.join(data_dir, f), "rb") as fh:
         flag = False
         masterdf = {}
         for line in fh:
             line = line.decode('utf-8', 'ignore')
             line = line.rstrip()
-            m = re.search("(IG|HY)24 5y SWAPTION UPDATE - Ref\D+(.+)$", line)
+            m = re.search("(IG|HY)(\d{2}) 5y SWAPTION (?:♦GRANULAR♦ )?(?:UPDATE|CLOSES) - Ref\D+(.+)$", line)
             if m:
                 indextype = m.groups()[0]
-                if indextype=='HY':
+                series = int(m.groups()[1])
+                if indextype == 'HY':
                     refprice, refspread = map(float,
-                                              re.match("([\S]+)\s+\(([^)]+)\)", m.groups()[1]).groups())
+                                              re.match("([\S]+)\s+\(([^)]+)\)", m.groups()[2]).groups())
                 else:
-                    refspread = float(m.groups()[1])
+                    refspread = float(m.groups()[2])
                 continue
             if line.startswith("At"):
                 quotedate = pd.to_datetime(line[4:])
@@ -28,7 +31,7 @@ for f in os.listdir("."):
                 m = re.match("Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)", line)
                 if m:
                     date, fwprice, fwspread = m.groups()
-                    date = pd.datetime.strptime(date, '%d%b%y')
+                    date = pd.to_datetime(date, format='%d%b%y')
                 continue
             if line.startswith("Stk"):
                 flag = True
@@ -47,23 +50,48 @@ for f in os.listdir("."):
                     continue
                 else:
                     if indextype=='HY':
-                        cols = ['Stk', 'Sprd', 'Pay', 'Delta', 'Rec', 'Vol',
+                        cols = ['Strike', 'Sprd', 'Pay', 'DeltaPay', 'Rec', 'Vol',
                                 'VolChg', 'VolBpd', 'Tail']
                     else:
-                        cols = ['Stk', 'Pay', 'Delta', 'Rec', 'Vol',
+                        cols = ['Strike', 'Pay', 'DeltaPay', 'Rec', 'Vol',
                                 'VolChg', 'VolBpd', 'Tail']
                     df = pd.DataFrame.from_records(r, columns = cols)
-                    df['refspread'] = refspread
-                    if indextype=='HY':
-                        df['refprice'] = refprice
+
                     df[['PayBid', 'PayOffer']] = df.Pay.str.split('/', expand=True)
                     df[['RecBid', 'RecOffer']] = df.Rec.str.split('/', expand=True)
                     df.drop(['Pay', 'Rec'], axis=1, inplace=True)
-                    df = df.convert_objects(convert_numeric=True)
-                    df.set_index('Stk', inplace=True)
-                    masterdf[date]=df
+                    for col in df:
+                        df[col] = pd.to_numeric(df[col], errors = 'coerce')
+                    df.set_index('Strike', inplace=True)
+                    d = {'quotedate': quotedate,
+                         'expiry': date,
+                         'indextype': indextype,
+                         'series': series,
+                         'ref': refspread if indextype =="IG" else refprice}
+                    if indextype == "IG":
+                        d['fwdspread'] = float(fwspread)
+                    else:
+                        d['fwdprice'] = float(fwprice)
+                    fwd_index.append(d)
+
+                    masterdf[date] = df
                     flag = False
                     r = []
                     continue
-        masterdf = pd.concat(masterdf)
-        pdb.set_trace()
+    all_df[(quotedate, indextype, series)] = pd.concat(masterdf, names=['expiry'])
+all_df = pd.concat(all_df, names = ['quotedate', 'indextype', 'series'])
+all_df['DeltaPay'] = - all_df['DeltaPay']/100
+index_df = pd.DataFrame.from_records(fwd_index)
+# with pd.HDFStore('../../data/swaptions_gs.hdf', mode = 'w', complevel=4,
+#                  complib='blosc', fletcher32=True) as swaptions:
+#     swaptions.append('swaptions', all_df)
+#     swaptions.append('index_data', index_df)
+all_df = all_df.rename(columns={'Strike':'strike',
+                                'Vol': 'vol',
+                                'PayOffer': 'pay_offer',
+                                'PayBid': 'pay_bid',
+                                'RecOffer': 'rec_offer'
+                                'RecBid': 'rec_bid',
+                                'Tail': 'tail',
+                                'DeltaPay': 'delta_pay'})
+})