diff options
Diffstat (limited to 'python/parse_gs.py')
| -rw-r--r-- | python/parse_gs.py | 32 |
1 files changed, 21 insertions, 11 deletions
diff --git a/python/parse_gs.py b/python/parse_gs.py index ccb56c14..4d78f744 100644 --- a/python/parse_gs.py +++ b/python/parse_gs.py @@ -2,26 +2,30 @@ import pandas as pd import pdb import re import os +import pdb -for f in os.listdir("quotes"): - with open(os.abspath(f)) as fh: +os.chdir("quotes") +for f in os.listdir("."): + with open(os.path.abspath(f), "rb") as fh: flag = False masterdf = {} for line in fh: - line = line.lstrip() + line = line.decode('utf-8', 'ignore') + line = line.rstrip() m = re.search("(IG|HY)24 5y SWAPTION UPDATE - Ref\D+(.+)$", line) if m: indextype = m.groups()[0] if indextype=='HY': - refprice, refspread = re.match("([^\t]+)\t+\(.*\)", m.groups()[1]).groups() + refprice, refspread = map(float, + re.match("([\S]+)\s+\(([^)]+)\)", m.groups()[1]).groups()) else: - refspread = m.groups()[1] + refspread = float(m.groups()[1]) continue if line.startswith("At"): quotedate = pd.to_datetime(line[4:]) continue if line.startswith("Expiry"): - m = re.match("Expiry (\d{2}\w{3}\d{2}) \(([^\t]+) ([^t]+)\)", line) + m = re.match("Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)", line) if m: date, fwprice, fwspread = m.groups() date = pd.datetime.strptime(date, '%d%b%y') @@ -42,11 +46,16 @@ for f in os.listdir("quotes"): r.append(vals) continue else: - df = pd.DataFrame.from_records(r, - columns = ['Stk', 'Sprd', 'Pay', 'Delta', 'Rec', 'Vol', - 'VolChg', 'VolBpd', 'Tail']) - df['forward'] = float(fwprice) - df['spread'] = float(fwspread) + if indextype=='HY': + cols = ['Stk', 'Sprd', 'Pay', 'Delta', 'Rec', 'Vol', + 'VolChg', 'VolBpd', 'Tail'] + else: + cols = ['Stk', 'Pay', 'Delta', 'Rec', 'Vol', + 'VolChg', 'VolBpd', 'Tail'] + df = pd.DataFrame.from_records(r, columns = cols) + df['refspread'] = refspread + if indextype=='HY': + df['refprice'] = refprice df[['PayBid', 'PayOffer']] = df.Pay.str.split('/').apply(pd.Series) df[['RecBid', 'RecOffer']] = df.Rec.str.split('/').apply(pd.Series) df.drop(['Pay', 'Rec'], axis=1, inplace=True) @@ -57,3 +66,4 @@ for f in os.listdir("quotes"): r = [] continue masterdf = pd.concat(masterdf) + pdb.set_trace() |
