import pandas as pd import re from pathlib import Path import pdb from download_emails import update_emails import datetime import sys def makedf(r, indextype): if indextype=='IG': cols = ['strike', 'rec_bid', 'rec_offer', 'delta_rec', 'pay_bid', 'pay_offer', 'delta_pay', 'vol', 'gamma'] else: cols = ['strike', 'rec_bid', 'rec_offer', 'delta_rec', 'pay_bid', 'pay_offer', 'delta_pay', 'vol', 'price_vol', 'gamma'] df = pd.DataFrame.from_records(r, columns = cols) for col in ['delta_rec', 'delta_pay', 'vol', 'price_vol', 'gamma']: if col in df: df[col] = df[col].str.strip("%").astype('float')/100 for k in df: if df.dtypes[k] == 'object': try: df[k] = pd.to_numeric(df[k]) except ValueError: pdb.set_trace() df.set_index('strike', inplace=True) return df def parse_email(email_path): with email_path.open("rb") as fh: date_received = datetime.datetime.fromtimestamp(int(fh.readline())/1000) subject = fh.readline() m = re.match("(?:Fwd:){0,2}(?:BAML )?(\w{2})([0-9]{1,2})\s", subject.decode('utf-8')) if m: indextype, series = m.groups() series = int(series) else: raise RuntimeError("can't parse subject line: {0} for email {1}".format( subject.decode("utf-8"), email_path.name)) flag = False option_stack = {} fwd_index = [] for line in fh: line = line.decode('utf-8', 'ignore') line = line.rstrip() if line.startswith("At"): for p in ['%m/%d %H:%M:%S', '%b %d %Y %H:%M:%S']: try: quotedate = pd.to_datetime(line, format=p, exact=False) except ValueError: continue else: if quotedate.year == 1900: quotedate = quotedate.replace(year=date_received.year) break else: raise RuntimeError("can't parse date") if line.startswith("Ref"): regex = "Ref:(?P\S+)\s+(?:Fwd Px:(?P\S+)\s+)?" \ "Fwd(?: Spd)?:(?P\S+)\s+Fwd Bpv:(?P\S+)" \ "\s+Expiry:(?P\S+)" m = re.match(regex, line) try: d = m.groupdict() d['quotedate'] = quotedate d['index'] = indextype d['series'] = series d['expiry'] = pd.to_datetime(d['expiry'], format='%d-%b-%y') except AttributeError: print("something wrong with {0}".format(email_path.name)) continue if line.startswith("Strike"): flag = True r = [] continue if flag: if line: line = re.sub("[/|]", " ", line) vals = re.sub(" +", " ", line).rstrip().split(" ") r.append(vals) continue else: option_stack[d['expiry']] = makedf(r, indextype) fwd_index.append(d) flag = False r = [] continue if flag: option_stack[d['expiry']] = makedf(r, indextype) fwd_index.append(d) if option_stack: fwd_index = pd.DataFrame.from_records(fwd_index, index='quotedate') return (quotedate, indextype, series), option_stack, fwd_index else: raise RuntimeError("empty email: {0}".format(email_path.name)) if __name__=="__main__": import pickle update_emails() emails = [f for f in Path("../../data/swaptions").iterdir() if f.is_file()] swaption_stack = {} index_data = pd.DataFrame() with open(".pickle", "rb") as fh: already_uploaded = pickle.load(fh) for f in emails: if f.name in already_uploaded: continue else: already_uploaded.add(f.name) try: key, option_stack, fwd_index = parse_email(f) except RuntimeError as e: print(e) else: swaption_stack[key] = pd.concat(option_stack, names=['expiry', 'strike']) index_data = index_data.append(fwd_index) if index_data.empty: sys.exit() for col in ['fwdbpv', 'fwdprice', 'fwdspread', 'ref']: index_data[col] = index_data[col].astype('float') index_data['index'] = index_data['index'].astype('category') swaption_stack = pd.concat(swaption_stack, names=['quotedate', 'index', 'series']) import feather feather.write_dataframe(swaption_stack, '../../data/swaptions.fth') feather.write_dataframe(index_data, '../../data/index_data.fth') swaption_stack = swaption_stack.drop_duplicates() swaption_stack = swaption_stack.reset_index() index_data = index_data.drop_duplicates() from db import dbengine serenitasdb = dbengine('serenitasdb') from sqlalchemy import MetaData, Table meta = MetaData(bind=serenitasdb) swaption_quotes = Table('swaption_quotes', meta, autoload=True) ins = swaption_quotes.insert().values(swaption_stack.to_dict(orient='records')).execute() index_data.to_sql('swaption_ref_quotes', serenitasdb, if_exists='append') with open(".pickle", "wb") as fh: pickle.dump(already_uploaded, fh)