diff options
| -rw-r--r-- | python/parse_emails.py | 58 | ||||
| -rw-r--r-- | python/parse_gs.py | 66 |
2 files changed, 89 insertions, 35 deletions
diff --git a/python/parse_emails.py b/python/parse_emails.py index f9ad9d9c..79d54a81 100644 --- a/python/parse_emails.py +++ b/python/parse_emails.py @@ -4,22 +4,26 @@ from pathlib import Path import pdb from download_emails import update_emails import datetime +import sys def makedf(r, indextype): if indextype=='IG': - cols = ['Strike', 'RecBid', 'RecOffer', 'DeltaRec', 'PayBid', - 'PayOffer', 'DeltaPay', 'Vol', 'Gamma'] + cols = ['strike', 'rec_bid', 'rec_offer', 'delta_rec', 'pay_bid', + 'pay_offer', 'delta_pay', 'vol', 'gamma'] else: - cols = ['Strike', 'RecBid', 'RecOffer', 'DeltaRec', 'PayBid', - 'PayOffer', 'DeltaPay', 'Vol', 'PxVol', 'Gamma'] + cols = ['strike', 'rec_bid', 'rec_offer', 'delta_rec', 'pay_bid', + 'pay_offer', 'delta_pay', 'vol', 'price_vol', 'gamma'] df = pd.DataFrame.from_records(r, columns = cols) - for col in ['DeltaRec', 'DeltaPay', 'Vol', 'PxVol', 'Gamma']: + for col in ['delta_rec', 'delta_pay', 'vol', 'price_vol', 'gamma']: if col in df: df[col] = df[col].str.strip("%").astype('float')/100 for k in df: - if df.dtypes[k]=='object': - df[k] = pd.to_numeric(df[k]) - df.set_index('Strike', inplace=True) + if df.dtypes[k] == 'object': + try: + df[k] = pd.to_numeric(df[k]) + except ValueError: + pdb.set_trace() + df.set_index('strike', inplace=True) return df def parse_email(email_path): @@ -59,7 +63,7 @@ def parse_email(email_path): try: d = m.groupdict() d['quotedate'] = quotedate - d['indextype'] = indextype + d['index'] = indextype d['series'] = series d['expiry'] = pd.to_datetime(d['expiry'], format='%d-%b-%y') except AttributeError: @@ -92,23 +96,45 @@ def parse_email(email_path): raise RuntimeError("empty email: {0}".format(email_path.name)) if __name__=="__main__": + import pickle update_emails() emails = [f for f in Path("../../data/swaptions").iterdir() if f.is_file()] swaption_stack = {} index_data = pd.DataFrame() + with open(".pickle", "rb") as fh: + already_uploaded = pickle.load(fh) for f in emails: + if f.name in already_uploaded: + continue + else: + already_uploaded.add(f.name) try: key, option_stack, fwd_index = parse_email(f) except RuntimeError as e: print(e) else: - swaption_stack[key] = pd.concat(option_stack, names=['expiry', 'Strike']) + swaption_stack[key] = pd.concat(option_stack, names=['expiry', 'strike']) index_data = index_data.append(fwd_index) + if index_data.empty: + sys.exit() for col in ['fwdbpv', 'fwdprice', 'fwdspread', 'ref']: index_data[col] = index_data[col].astype('float') - index_data['indextype'] = index_data['indextype'].astype('category') - swaption_stack = pd.concat(swaption_stack, names=['quotedate', 'indextype', 'series']) - with pd.HDFStore('../../data/swaptions.hdf', mode = 'w', complevel=4, - complib='blosc', fletcher32=True) as swaptions: - swaptions.append('swaptions', swaption_stack) - swaptions.append('index_data', index_data) + index_data['index'] = index_data['index'].astype('category') + + swaption_stack = pd.concat(swaption_stack, names=['quotedate', 'index', 'series']) + import feather + feather.write_dataframe(swaption_stack, '../../data/swaptions.fth') + feather.write_dataframe(index_data, '../../data/index_data.fth') + + swaption_stack = swaption_stack.drop_duplicates() + swaption_stack = swaption_stack.reset_index() + index_data = index_data.drop_duplicates() + from db import dbengine + serenitasdb = dbengine('serenitasdb') + from sqlalchemy import MetaData, Table + meta = MetaData(bind=serenitasdb) + swaption_quotes = Table('swaption_quotes', meta, autoload=True) + ins = swaption_quotes.insert().values(swaption_stack.to_dict(orient='records')).execute() + index_data.to_sql('swaption_ref_quotes', serenitasdb, if_exists='append') + with open(".pickle", "wb") as fh: + pickle.dump(already_uploaded, fh) diff --git a/python/parse_gs.py b/python/parse_gs.py index 999d3e95..7a0ec006 100644 --- a/python/parse_gs.py +++ b/python/parse_gs.py @@ -2,24 +2,27 @@ import pandas as pd import pdb import re import os -import pdb -os.chdir("quotes") -for f in os.listdir("."): - with open(os.path.abspath(f), "rb") as fh: +data_dir = "/home/share/guillaume/swaptions" +all_df = {} +fwd_index = [] +for f in os.listdir(data_dir): + print(f) + with open(os.path.join(data_dir, f), "rb") as fh: flag = False masterdf = {} for line in fh: line = line.decode('utf-8', 'ignore') line = line.rstrip() - m = re.search("(IG|HY)24 5y SWAPTION UPDATE - Ref\D+(.+)$", line) + m = re.search("(IG|HY)(\d{2}) 5y SWAPTION (?:♦GRANULAR♦ )?(?:UPDATE|CLOSES) - Ref\D+(.+)$", line) if m: indextype = m.groups()[0] - if indextype=='HY': + series = int(m.groups()[1]) + if indextype == 'HY': refprice, refspread = map(float, - re.match("([\S]+)\s+\(([^)]+)\)", m.groups()[1]).groups()) + re.match("([\S]+)\s+\(([^)]+)\)", m.groups()[2]).groups()) else: - refspread = float(m.groups()[1]) + refspread = float(m.groups()[2]) continue if line.startswith("At"): quotedate = pd.to_datetime(line[4:]) @@ -28,7 +31,7 @@ for f in os.listdir("."): m = re.match("Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)", line) if m: date, fwprice, fwspread = m.groups() - date = pd.datetime.strptime(date, '%d%b%y') + date = pd.to_datetime(date, format='%d%b%y') continue if line.startswith("Stk"): flag = True @@ -47,23 +50,48 @@ for f in os.listdir("."): continue else: if indextype=='HY': - cols = ['Stk', 'Sprd', 'Pay', 'Delta', 'Rec', 'Vol', + cols = ['Strike', 'Sprd', 'Pay', 'DeltaPay', 'Rec', 'Vol', 'VolChg', 'VolBpd', 'Tail'] else: - cols = ['Stk', 'Pay', 'Delta', 'Rec', 'Vol', + cols = ['Strike', 'Pay', 'DeltaPay', 'Rec', 'Vol', 'VolChg', 'VolBpd', 'Tail'] df = pd.DataFrame.from_records(r, columns = cols) - df['refspread'] = refspread - if indextype=='HY': - df['refprice'] = refprice + df[['PayBid', 'PayOffer']] = df.Pay.str.split('/', expand=True) df[['RecBid', 'RecOffer']] = df.Rec.str.split('/', expand=True) df.drop(['Pay', 'Rec'], axis=1, inplace=True) - df = df.convert_objects(convert_numeric=True) - df.set_index('Stk', inplace=True) - masterdf[date]=df + for col in df: + df[col] = pd.to_numeric(df[col], errors = 'coerce') + df.set_index('Strike', inplace=True) + d = {'quotedate': quotedate, + 'expiry': date, + 'indextype': indextype, + 'series': series, + 'ref': refspread if indextype =="IG" else refprice} + if indextype == "IG": + d['fwdspread'] = float(fwspread) + else: + d['fwdprice'] = float(fwprice) + fwd_index.append(d) + + masterdf[date] = df flag = False r = [] continue - masterdf = pd.concat(masterdf) - pdb.set_trace() + all_df[(quotedate, indextype, series)] = pd.concat(masterdf, names=['expiry']) +all_df = pd.concat(all_df, names = ['quotedate', 'indextype', 'series']) +all_df['DeltaPay'] = - all_df['DeltaPay']/100 +index_df = pd.DataFrame.from_records(fwd_index) +# with pd.HDFStore('../../data/swaptions_gs.hdf', mode = 'w', complevel=4, +# complib='blosc', fletcher32=True) as swaptions: +# swaptions.append('swaptions', all_df) +# swaptions.append('index_data', index_df) +all_df = all_df.rename(columns={'Strike':'strike', + 'Vol': 'vol', + 'PayOffer': 'pay_offer', + 'PayBid': 'pay_bid', + 'RecOffer': 'rec_offer' + 'RecBid': 'rec_bid', + 'Tail': 'tail', + 'DeltaPay': 'delta_pay'}) +}) |
