diff options
Diffstat (limited to 'python')
| -rw-r--r-- | python/parse_gs.py | 154 |
1 files changed, 85 insertions, 69 deletions
diff --git a/python/parse_gs.py b/python/parse_gs.py index b58065ee..b0aad079 100644 --- a/python/parse_gs.py +++ b/python/parse_gs.py @@ -3,81 +3,83 @@ import pdb import re import os -data_dir = "/home/share/guillaume/IG swaptions" +data_dir = "/home/share/CorpCDOs/data/swaptions/GS swaptions" all_df = {} fwd_index = [] -for f in os.listdir(data_dir): - with open(os.path.join(data_dir, f), "rb") as fh: - flag = False - masterdf = {} - for line in fh: - line = line.decode('utf-8', 'ignore') - line = line.rstrip() - m = re.search("(IG|HY)(\d{2}) 5y SWAPTION (?:♦GRANULAR♦ )?(?:UPDATE|CLOSES) - Ref\D+(.+)$", line) - if m: - indextype = m.groups()[0] - series = int(m.groups()[1]) - if indextype == 'HY': - refprice, refspread = map(float, - re.match("([\S]+)\s+\(([^)]+)\)", m.groups()[2]).groups()) - else: - refspread = float(m.groups()[2]) - continue - if line.startswith("At"): - quotedate = pd.to_datetime(line[4:]) - continue - if line.startswith("Expiry"): - m = re.match("Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)", line) +for index in ["IG", "HY"]: + full_path = os.path.join(data_dir, index + " swaptions") + for f in os.listdir(os.path.join(data_dir, index + " swaptions")): + with open(os.path.join(full_path, f), "rb") as fh: + flag = False + masterdf = {} + for line in fh: + line = line.decode('utf-8', 'ignore') + line = line.rstrip() + m = re.search("(IG|HY)(\d{2}) 5y SWAPTION (?:♦GRANULAR♦ )?(?:UPDATE|CLOSES) - Ref\D+(.+)$", line) if m: - date, fwprice, fwspread = m.groups() - date = pd.to_datetime(date, format='%d%b%y') - continue - if line.startswith("Stk"): - flag = True - r = [] - continue - if flag: - if line: - vals = re.sub(" +", " ", line).split(" ") - if indextype=='HY': - vals.pop(2) - vals.pop(9) + indextype = m.groups()[0] + series = int(m.groups()[1]) + if indextype == 'HY': + refprice, refspread = map(float, + re.match("([\S]+)\s+\(([^)]+)\)", m.groups()[2]).groups()) else: - vals.pop(1) - vals.pop(8) - r.append(vals) + refspread = float(m.groups()[2]) continue - else: - if indextype=='HY': - cols = ['Strike', 'Sprd', 'Pay', 'DeltaPay', 'Rec', 'Vol', - 'VolChg', 'VolBpd', 'Tail'] + if line.startswith("At"): + quotedate = pd.to_datetime(line[4:]) + continue + if line.startswith("Expiry"): + m = re.match("Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)", line) + if m: + date, fwprice, fwspread = m.groups() + date = pd.to_datetime(date, format='%d%b%y') + continue + if line.startswith("Stk"): + flag = True + r = [] + continue + if flag: + if line: + vals = re.sub(" +", " ", line).split(" ") + if indextype=='HY': + vals.pop(2) + vals.pop(9) + else: + vals.pop(1) + vals.pop(8) + r.append(vals) + continue else: - cols = ['Strike', 'Pay', 'DeltaPay', 'Rec', 'Vol', - 'VolChg', 'VolBpd', 'Tail'] - df = pd.DataFrame.from_records(r, columns = cols) + if indextype=='HY': + cols = ['Strike', 'Sprd', 'Pay', 'DeltaPay', 'Rec', 'Vol', + 'VolChg', 'VolBpd', 'Tail'] + else: + cols = ['Strike', 'Pay', 'DeltaPay', 'Rec', 'Vol', + 'VolChg', 'VolBpd', 'Tail'] + df = pd.DataFrame.from_records(r, columns = cols) - df[['PayBid', 'PayOffer']] = df.Pay.str.split('/', expand=True) - df[['RecBid', 'RecOffer']] = df.Rec.str.split('/', expand=True) - df.drop(['Pay', 'Rec'], axis=1, inplace=True) - for col in df: - df[col] = pd.to_numeric(df[col], errors = 'coerce') - df.set_index('Strike', inplace=True) - d = {'quotedate': quotedate, - 'expiry': date, - 'index': indextype, - 'series': series, - 'ref': refspread if indextype =="IG" else refprice} - if indextype == "IG": - d['fwdspread'] = float(fwspread) - else: - d['fwdprice'] = float(fwprice) - fwd_index.append(d) + df[['PayBid', 'PayOffer']] = df.Pay.str.split('/', expand=True) + df[['RecBid', 'RecOffer']] = df.Rec.str.split('/', expand=True) + df.drop(['Pay', 'Rec'], axis=1, inplace=True) + for col in df: + df[col] = pd.to_numeric(df[col], errors = 'coerce') + df.set_index('Strike', inplace=True) + d = {'quotedate': quotedate, + 'expiry': date, + 'index': indextype, + 'series': series, + 'ref': refspread if indextype =="IG" else refprice} + if indextype == "IG": + d['fwdspread'] = float(fwspread) + else: + d['fwdprice'] = float(fwprice) + fwd_index.append(d) - masterdf[date] = df - flag = False - r = [] - continue - all_df[(quotedate, indextype, series)] = pd.concat(masterdf, names=['expiry']) + masterdf[date] = df + flag = False + r = [] + continue + all_df[(quotedate, indextype, series)] = pd.concat(masterdf, names=['expiry']) all_df = pd.concat(all_df, names = ['quotedate', 'index', 'series']) all_df['DeltaPay'] = - all_df['DeltaPay']/100 all_df['Vol'] /= 100 @@ -96,8 +98,22 @@ del all_df['VolBpd'], all_df['VolChg'] if 'Sprd' in all_df: del all_df['Sprd'] all_df['quote_source'] = 'GS' +index_df = index_df.drop_duplicates(['quotedate', 'index', 'series', 'expiry']) + +##insert quotes from db import dbengine serenitasdb = dbengine('serenitasdb') +conn = serenitasdb.raw_connection() +## first delete quotes +with conn.cursor() as c: + c.execute("DELETE FROM serenitas_quotes WHERE quote_source='GS'") +conn.commit() all_df.to_sql('swaption_quotes', serenitasdb, if_exists='append', index=False) -index_df = index_df.drop_duplicates(['quotedate', 'index', 'series', 'expiry']) -index_df.to_sql('swaption_ref_quotes', serenitasdb, if_exists='append', index=False) + +sqlstr = "INSERT INTO swaption_ref_quotes(quotedate, index, series, expiry, ref, fwdprice, fwdspread) "\ + "VALUES(%(quotedate)s, %(index)s, %(series)s, %(expiry)s, %(ref)s, %(fwdprice)s, %(fwdspread)s) " \ + "ON CONFLICT DO NOTHING" +with conn.cursor() as c: + c.executemany(sqlstr, index_df.to_dict(orient='records')) +conn.commit() +conn.close() |
