aboutsummaryrefslogtreecommitdiffstats
path: root/python/parse_gs.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/parse_gs.py')
-rw-r--r--python/parse_gs.py66
1 files changed, 47 insertions, 19 deletions
diff --git a/python/parse_gs.py b/python/parse_gs.py
index 999d3e95..7a0ec006 100644
--- a/python/parse_gs.py
+++ b/python/parse_gs.py
@@ -2,24 +2,27 @@ import pandas as pd
import pdb
import re
import os
-import pdb
-os.chdir("quotes")
-for f in os.listdir("."):
- with open(os.path.abspath(f), "rb") as fh:
+data_dir = "/home/share/guillaume/swaptions"
+all_df = {}
+fwd_index = []
+for f in os.listdir(data_dir):
+ print(f)
+ with open(os.path.join(data_dir, f), "rb") as fh:
flag = False
masterdf = {}
for line in fh:
line = line.decode('utf-8', 'ignore')
line = line.rstrip()
- m = re.search("(IG|HY)24 5y SWAPTION UPDATE - Ref\D+(.+)$", line)
+ m = re.search("(IG|HY)(\d{2}) 5y SWAPTION (?:♦GRANULAR♦ )?(?:UPDATE|CLOSES) - Ref\D+(.+)$", line)
if m:
indextype = m.groups()[0]
- if indextype=='HY':
+ series = int(m.groups()[1])
+ if indextype == 'HY':
refprice, refspread = map(float,
- re.match("([\S]+)\s+\(([^)]+)\)", m.groups()[1]).groups())
+ re.match("([\S]+)\s+\(([^)]+)\)", m.groups()[2]).groups())
else:
- refspread = float(m.groups()[1])
+ refspread = float(m.groups()[2])
continue
if line.startswith("At"):
quotedate = pd.to_datetime(line[4:])
@@ -28,7 +31,7 @@ for f in os.listdir("."):
m = re.match("Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)", line)
if m:
date, fwprice, fwspread = m.groups()
- date = pd.datetime.strptime(date, '%d%b%y')
+ date = pd.to_datetime(date, format='%d%b%y')
continue
if line.startswith("Stk"):
flag = True
@@ -47,23 +50,48 @@ for f in os.listdir("."):
continue
else:
if indextype=='HY':
- cols = ['Stk', 'Sprd', 'Pay', 'Delta', 'Rec', 'Vol',
+ cols = ['Strike', 'Sprd', 'Pay', 'DeltaPay', 'Rec', 'Vol',
'VolChg', 'VolBpd', 'Tail']
else:
- cols = ['Stk', 'Pay', 'Delta', 'Rec', 'Vol',
+ cols = ['Strike', 'Pay', 'DeltaPay', 'Rec', 'Vol',
'VolChg', 'VolBpd', 'Tail']
df = pd.DataFrame.from_records(r, columns = cols)
- df['refspread'] = refspread
- if indextype=='HY':
- df['refprice'] = refprice
+
df[['PayBid', 'PayOffer']] = df.Pay.str.split('/', expand=True)
df[['RecBid', 'RecOffer']] = df.Rec.str.split('/', expand=True)
df.drop(['Pay', 'Rec'], axis=1, inplace=True)
- df = df.convert_objects(convert_numeric=True)
- df.set_index('Stk', inplace=True)
- masterdf[date]=df
+ for col in df:
+ df[col] = pd.to_numeric(df[col], errors = 'coerce')
+ df.set_index('Strike', inplace=True)
+ d = {'quotedate': quotedate,
+ 'expiry': date,
+ 'indextype': indextype,
+ 'series': series,
+ 'ref': refspread if indextype =="IG" else refprice}
+ if indextype == "IG":
+ d['fwdspread'] = float(fwspread)
+ else:
+ d['fwdprice'] = float(fwprice)
+ fwd_index.append(d)
+
+ masterdf[date] = df
flag = False
r = []
continue
- masterdf = pd.concat(masterdf)
- pdb.set_trace()
+ all_df[(quotedate, indextype, series)] = pd.concat(masterdf, names=['expiry'])
+all_df = pd.concat(all_df, names = ['quotedate', 'indextype', 'series'])
+all_df['DeltaPay'] = - all_df['DeltaPay']/100
+index_df = pd.DataFrame.from_records(fwd_index)
+# with pd.HDFStore('../../data/swaptions_gs.hdf', mode = 'w', complevel=4,
+# complib='blosc', fletcher32=True) as swaptions:
+# swaptions.append('swaptions', all_df)
+# swaptions.append('index_data', index_df)
+all_df = all_df.rename(columns={'Strike':'strike',
+ 'Vol': 'vol',
+ 'PayOffer': 'pay_offer',
+ 'PayBid': 'pay_bid',
+ 'RecOffer': 'rec_offer'
+ 'RecBid': 'rec_bid',
+ 'Tail': 'tail',
+ 'DeltaPay': 'delta_pay'})
+})