diff options
Diffstat (limited to 'python/parse_gs.py')
| -rw-r--r-- | python/parse_gs.py | 66 |
1 files changed, 47 insertions, 19 deletions
diff --git a/python/parse_gs.py b/python/parse_gs.py index 999d3e95..7a0ec006 100644 --- a/python/parse_gs.py +++ b/python/parse_gs.py @@ -2,24 +2,27 @@ import pandas as pd import pdb import re import os -import pdb -os.chdir("quotes") -for f in os.listdir("."): - with open(os.path.abspath(f), "rb") as fh: +data_dir = "/home/share/guillaume/swaptions" +all_df = {} +fwd_index = [] +for f in os.listdir(data_dir): + print(f) + with open(os.path.join(data_dir, f), "rb") as fh: flag = False masterdf = {} for line in fh: line = line.decode('utf-8', 'ignore') line = line.rstrip() - m = re.search("(IG|HY)24 5y SWAPTION UPDATE - Ref\D+(.+)$", line) + m = re.search("(IG|HY)(\d{2}) 5y SWAPTION (?:♦GRANULAR♦ )?(?:UPDATE|CLOSES) - Ref\D+(.+)$", line) if m: indextype = m.groups()[0] - if indextype=='HY': + series = int(m.groups()[1]) + if indextype == 'HY': refprice, refspread = map(float, - re.match("([\S]+)\s+\(([^)]+)\)", m.groups()[1]).groups()) + re.match("([\S]+)\s+\(([^)]+)\)", m.groups()[2]).groups()) else: - refspread = float(m.groups()[1]) + refspread = float(m.groups()[2]) continue if line.startswith("At"): quotedate = pd.to_datetime(line[4:]) @@ -28,7 +31,7 @@ for f in os.listdir("."): m = re.match("Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)", line) if m: date, fwprice, fwspread = m.groups() - date = pd.datetime.strptime(date, '%d%b%y') + date = pd.to_datetime(date, format='%d%b%y') continue if line.startswith("Stk"): flag = True @@ -47,23 +50,48 @@ for f in os.listdir("."): continue else: if indextype=='HY': - cols = ['Stk', 'Sprd', 'Pay', 'Delta', 'Rec', 'Vol', + cols = ['Strike', 'Sprd', 'Pay', 'DeltaPay', 'Rec', 'Vol', 'VolChg', 'VolBpd', 'Tail'] else: - cols = ['Stk', 'Pay', 'Delta', 'Rec', 'Vol', + cols = ['Strike', 'Pay', 'DeltaPay', 'Rec', 'Vol', 'VolChg', 'VolBpd', 'Tail'] df = pd.DataFrame.from_records(r, columns = cols) - df['refspread'] = refspread - if indextype=='HY': - df['refprice'] = refprice + df[['PayBid', 'PayOffer']] = df.Pay.str.split('/', expand=True) df[['RecBid', 'RecOffer']] = df.Rec.str.split('/', expand=True) df.drop(['Pay', 'Rec'], axis=1, inplace=True) - df = df.convert_objects(convert_numeric=True) - df.set_index('Stk', inplace=True) - masterdf[date]=df + for col in df: + df[col] = pd.to_numeric(df[col], errors = 'coerce') + df.set_index('Strike', inplace=True) + d = {'quotedate': quotedate, + 'expiry': date, + 'indextype': indextype, + 'series': series, + 'ref': refspread if indextype =="IG" else refprice} + if indextype == "IG": + d['fwdspread'] = float(fwspread) + else: + d['fwdprice'] = float(fwprice) + fwd_index.append(d) + + masterdf[date] = df flag = False r = [] continue - masterdf = pd.concat(masterdf) - pdb.set_trace() + all_df[(quotedate, indextype, series)] = pd.concat(masterdf, names=['expiry']) +all_df = pd.concat(all_df, names = ['quotedate', 'indextype', 'series']) +all_df['DeltaPay'] = - all_df['DeltaPay']/100 +index_df = pd.DataFrame.from_records(fwd_index) +# with pd.HDFStore('../../data/swaptions_gs.hdf', mode = 'w', complevel=4, +# complib='blosc', fletcher32=True) as swaptions: +# swaptions.append('swaptions', all_df) +# swaptions.append('index_data', index_df) +all_df = all_df.rename(columns={'Strike':'strike', + 'Vol': 'vol', + 'PayOffer': 'pay_offer', + 'PayBid': 'pay_bid', + 'RecOffer': 'rec_offer' + 'RecBid': 'rec_bid', + 'Tail': 'tail', + 'DeltaPay': 'delta_pay'}) +}) |
