diff options
Diffstat (limited to 'python/parse_emails.py')
| -rw-r--r-- | python/parse_emails.py | 58 |
1 files changed, 42 insertions, 16 deletions
diff --git a/python/parse_emails.py b/python/parse_emails.py index f9ad9d9c..79d54a81 100644 --- a/python/parse_emails.py +++ b/python/parse_emails.py @@ -4,22 +4,26 @@ from pathlib import Path import pdb from download_emails import update_emails import datetime +import sys def makedf(r, indextype): if indextype=='IG': - cols = ['Strike', 'RecBid', 'RecOffer', 'DeltaRec', 'PayBid', - 'PayOffer', 'DeltaPay', 'Vol', 'Gamma'] + cols = ['strike', 'rec_bid', 'rec_offer', 'delta_rec', 'pay_bid', + 'pay_offer', 'delta_pay', 'vol', 'gamma'] else: - cols = ['Strike', 'RecBid', 'RecOffer', 'DeltaRec', 'PayBid', - 'PayOffer', 'DeltaPay', 'Vol', 'PxVol', 'Gamma'] + cols = ['strike', 'rec_bid', 'rec_offer', 'delta_rec', 'pay_bid', + 'pay_offer', 'delta_pay', 'vol', 'price_vol', 'gamma'] df = pd.DataFrame.from_records(r, columns = cols) - for col in ['DeltaRec', 'DeltaPay', 'Vol', 'PxVol', 'Gamma']: + for col in ['delta_rec', 'delta_pay', 'vol', 'price_vol', 'gamma']: if col in df: df[col] = df[col].str.strip("%").astype('float')/100 for k in df: - if df.dtypes[k]=='object': - df[k] = pd.to_numeric(df[k]) - df.set_index('Strike', inplace=True) + if df.dtypes[k] == 'object': + try: + df[k] = pd.to_numeric(df[k]) + except ValueError: + pdb.set_trace() + df.set_index('strike', inplace=True) return df def parse_email(email_path): @@ -59,7 +63,7 @@ def parse_email(email_path): try: d = m.groupdict() d['quotedate'] = quotedate - d['indextype'] = indextype + d['index'] = indextype d['series'] = series d['expiry'] = pd.to_datetime(d['expiry'], format='%d-%b-%y') except AttributeError: @@ -92,23 +96,45 @@ def parse_email(email_path): raise RuntimeError("empty email: {0}".format(email_path.name)) if __name__=="__main__": + import pickle update_emails() emails = [f for f in Path("../../data/swaptions").iterdir() if f.is_file()] swaption_stack = {} index_data = pd.DataFrame() + with open(".pickle", "rb") as fh: + already_uploaded = pickle.load(fh) for f in emails: + if f.name in already_uploaded: + continue + else: + already_uploaded.add(f.name) try: key, option_stack, fwd_index = parse_email(f) except RuntimeError as e: print(e) else: - swaption_stack[key] = pd.concat(option_stack, names=['expiry', 'Strike']) + swaption_stack[key] = pd.concat(option_stack, names=['expiry', 'strike']) index_data = index_data.append(fwd_index) + if index_data.empty: + sys.exit() for col in ['fwdbpv', 'fwdprice', 'fwdspread', 'ref']: index_data[col] = index_data[col].astype('float') - index_data['indextype'] = index_data['indextype'].astype('category') - swaption_stack = pd.concat(swaption_stack, names=['quotedate', 'indextype', 'series']) - with pd.HDFStore('../../data/swaptions.hdf', mode = 'w', complevel=4, - complib='blosc', fletcher32=True) as swaptions: - swaptions.append('swaptions', swaption_stack) - swaptions.append('index_data', index_data) + index_data['index'] = index_data['index'].astype('category') + + swaption_stack = pd.concat(swaption_stack, names=['quotedate', 'index', 'series']) + import feather + feather.write_dataframe(swaption_stack, '../../data/swaptions.fth') + feather.write_dataframe(index_data, '../../data/index_data.fth') + + swaption_stack = swaption_stack.drop_duplicates() + swaption_stack = swaption_stack.reset_index() + index_data = index_data.drop_duplicates() + from db import dbengine + serenitasdb = dbengine('serenitasdb') + from sqlalchemy import MetaData, Table + meta = MetaData(bind=serenitasdb) + swaption_quotes = Table('swaption_quotes', meta, autoload=True) + ins = swaption_quotes.insert().values(swaption_stack.to_dict(orient='records')).execute() + index_data.to_sql('swaption_ref_quotes', serenitasdb, if_exists='append') + with open(".pickle", "wb") as fh: + pickle.dump(already_uploaded, fh) |
