1 files changed, 85 insertions, 69 deletions
diff --git a/python/parse_gs.py b/python/parse_gs.py
index b58065ee..b0aad079 100644
--- a/python/parse_gs.py
+++ b/python/parse_gs.py
@@ -3,81 +3,83 @@ import pdb
 import re
 import os
 
-data_dir = "/home/share/guillaume/IG swaptions"
+data_dir = "/home/share/CorpCDOs/data/swaptions/GS swaptions"
 all_df = {}
 fwd_index = []
-for f in os.listdir(data_dir):
-    with open(os.path.join(data_dir, f), "rb") as fh:
-        flag = False
-        masterdf = {}
-        for line in fh:
-            line = line.decode('utf-8', 'ignore')
-            line = line.rstrip()
-            m = re.search("(IG|HY)(\d{2}) 5y SWAPTION (?:♦GRANULAR♦ )?(?:UPDATE|CLOSES) - Ref\D+(.+)$", line)
-            if m:
-                indextype = m.groups()[0]
-                series = int(m.groups()[1])
-                if indextype == 'HY':
-                    refprice, refspread = map(float,
-                                              re.match("([\S]+)\s+\(([^)]+)\)", m.groups()[2]).groups())
-                else:
-                    refspread = float(m.groups()[2])
-                continue
-            if line.startswith("At"):
-                quotedate = pd.to_datetime(line[4:])
-                continue
-            if line.startswith("Expiry"):
-                m = re.match("Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)", line)
+for index in ["IG", "HY"]:
+    full_path = os.path.join(data_dir, index + " swaptions")
+    for f in os.listdir(os.path.join(data_dir, index + " swaptions")):
+        with open(os.path.join(full_path, f), "rb") as fh:
+            flag = False
+            masterdf = {}
+            for line in fh:
+                line = line.decode('utf-8', 'ignore')
+                line = line.rstrip()
+                m = re.search("(IG|HY)(\d{2}) 5y SWAPTION (?:♦GRANULAR♦ )?(?:UPDATE|CLOSES) - Ref\D+(.+)$", line)
                 if m:
-                    date, fwprice, fwspread = m.groups()
-                    date = pd.to_datetime(date, format='%d%b%y')
-                continue
-            if line.startswith("Stk"):
-                flag = True
-                r = []
-                continue
-            if flag:
-                if line:
-                    vals = re.sub(" +", " ", line).split(" ")
-                    if indextype=='HY':
-                        vals.pop(2)
-                        vals.pop(9)
+                    indextype = m.groups()[0]
+                    series = int(m.groups()[1])
+                    if indextype == 'HY':
+                        refprice, refspread = map(float,
+                                                  re.match("([\S]+)\s+\(([^)]+)\)", m.groups()[2]).groups())
                     else:
-                        vals.pop(1)
-                        vals.pop(8)
-                    r.append(vals)
+                        refspread = float(m.groups()[2])
                     continue
-                else:
-                    if indextype=='HY':
-                        cols = ['Strike', 'Sprd', 'Pay', 'DeltaPay', 'Rec', 'Vol',
-                                'VolChg', 'VolBpd', 'Tail']
+                if line.startswith("At"):
+                    quotedate = pd.to_datetime(line[4:])
+                    continue
+                if line.startswith("Expiry"):
+                    m = re.match("Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)", line)
+                    if m:
+                        date, fwprice, fwspread = m.groups()
+                        date = pd.to_datetime(date, format='%d%b%y')
+                    continue
+                if line.startswith("Stk"):
+                    flag = True
+                    r = []
+                    continue
+                if flag:
+                    if line:
+                        vals = re.sub(" +", " ", line).split(" ")
+                        if indextype=='HY':
+                            vals.pop(2)
+                            vals.pop(9)
+                        else:
+                            vals.pop(1)
+                            vals.pop(8)
+                        r.append(vals)
+                        continue
                     else:
-                        cols = ['Strike', 'Pay', 'DeltaPay', 'Rec', 'Vol',
-                                'VolChg', 'VolBpd', 'Tail']
-                    df = pd.DataFrame.from_records(r, columns = cols)
+                        if indextype=='HY':
+                            cols = ['Strike', 'Sprd', 'Pay', 'DeltaPay', 'Rec', 'Vol',
+                                    'VolChg', 'VolBpd', 'Tail']
+                        else:
+                            cols = ['Strike', 'Pay', 'DeltaPay', 'Rec', 'Vol',
+                                    'VolChg', 'VolBpd', 'Tail']
+                        df = pd.DataFrame.from_records(r, columns = cols)
 
-                    df[['PayBid', 'PayOffer']] = df.Pay.str.split('/', expand=True)
-                    df[['RecBid', 'RecOffer']] = df.Rec.str.split('/', expand=True)
-                    df.drop(['Pay', 'Rec'], axis=1, inplace=True)
-                    for col in df:
-                        df[col] = pd.to_numeric(df[col], errors = 'coerce')
-                    df.set_index('Strike', inplace=True)
-                    d = {'quotedate': quotedate,
-                         'expiry': date,
-                         'index': indextype,
-                         'series': series,
-                         'ref': refspread if indextype =="IG" else refprice}
-                    if indextype == "IG":
-                        d['fwdspread'] = float(fwspread)
-                    else:
-                        d['fwdprice'] = float(fwprice)
-                    fwd_index.append(d)
+                        df[['PayBid', 'PayOffer']] = df.Pay.str.split('/', expand=True)
+                        df[['RecBid', 'RecOffer']] = df.Rec.str.split('/', expand=True)
+                        df.drop(['Pay', 'Rec'], axis=1, inplace=True)
+                        for col in df:
+                            df[col] = pd.to_numeric(df[col], errors = 'coerce')
+                        df.set_index('Strike', inplace=True)
+                        d = {'quotedate': quotedate,
+                             'expiry': date,
+                             'index': indextype,
+                             'series': series,
+                             'ref': refspread if indextype =="IG" else refprice}
+                        if indextype == "IG":
+                            d['fwdspread'] = float(fwspread)
+                        else:
+                            d['fwdprice'] = float(fwprice)
+                        fwd_index.append(d)
 
-                    masterdf[date] = df
-                    flag = False
-                    r = []
-                    continue
-    all_df[(quotedate, indextype, series)] = pd.concat(masterdf, names=['expiry'])
+                        masterdf[date] = df
+                        flag = False
+                        r = []
+                        continue
+        all_df[(quotedate, indextype, series)] = pd.concat(masterdf, names=['expiry'])
 all_df = pd.concat(all_df, names = ['quotedate', 'index', 'series'])
 all_df['DeltaPay'] = - all_df['DeltaPay']/100
 all_df['Vol'] /= 100
@@ -96,8 +98,22 @@ del all_df['VolBpd'], all_df['VolChg']
 if 'Sprd' in all_df:
     del all_df['Sprd']
 all_df['quote_source'] = 'GS'
+index_df = index_df.drop_duplicates(['quotedate', 'index', 'series', 'expiry'])
+
+##insert quotes
 from db import dbengine
 serenitasdb = dbengine('serenitasdb')
+conn = serenitasdb.raw_connection()
+## first delete quotes
+with conn.cursor() as c:
+    c.execute("DELETE FROM serenitas_quotes WHERE quote_source='GS'")
+conn.commit()
 all_df.to_sql('swaption_quotes', serenitasdb, if_exists='append', index=False)
-index_df = index_df.drop_duplicates(['quotedate', 'index', 'series', 'expiry'])
-index_df.to_sql('swaption_ref_quotes', serenitasdb, if_exists='append', index=False)
+
+sqlstr = "INSERT INTO swaption_ref_quotes(quotedate, index, series, expiry, ref, fwdprice, fwdspread) "\
+         "VALUES(%(quotedate)s, %(index)s, %(series)s, %(expiry)s, %(ref)s, %(fwdprice)s, %(fwdspread)s) " \
+         "ON CONFLICT DO NOTHING"
+with conn.cursor() as c:
+    c.executemany(sqlstr, index_df.to_dict(orient='records'))
+conn.commit()
+conn.close()