import pandas as pd import re from pathlib import Path import pdb from download_emails import update_emails import datetime def makedf(r, indextype, ref): if indextype=='IG': cols = ['Strike', 'RecBid', 'RecOffer', 'DeltaRec', 'PayBid', 'PayOffer', 'DeltaPay', 'Vol', 'Gamma'] else: cols = ['Strike', 'RecBid', 'RecOffer', 'DeltaRec', 'PayBid', 'PayOffer', 'DeltaPay', 'Vol', 'PxVol', 'Gamma'] df = pd.DataFrame.from_records(r, columns = cols) df['ref'] = ref for col in ['DeltaRec', 'DeltaPay', 'Vol', 'PxVol', 'Gamma']: if col in df: df[col] = df[col].str.strip("%").astype('float')/100 for k in df: if df.dtypes[k]=='object': df[k] = pd.to_numeric(df[k]) df.set_index('Strike', inplace=True) return df def parse_email(email_path): with email_path.open("rb") as fh: date_received = datetime.datetime.fromtimestamp(int(fh.readline())/1000) subject = fh.readline() m = re.match("(?:Fwd:){0,2}(?:BAML )?(\w{2})([0-9]{1,2})\s", subject.decode('utf-8')) if m: indextype, series = m.groups() series = int(series) else: raise RuntimeError("can't parse subject line: {0} for email {1}".format( subject.decode("utf-8"), email_path.name)) flag = False option_stack = {} fwd_index = [] for line in fh: line = line.decode('utf-8', 'ignore') line = line.rstrip() if line.startswith("At"): for p in ['%m/%d %H:%M:%S', '%b %d %Y %H:%M:%S']: try: quotedate = pd.to_datetime(line, format=p, exact=False) except ValueError: continue else: if quotedate.year == 1900: quotedate = quotedate.replace(year=date_received.year) break else: raise RuntimeError("can't parse date") if line.startswith("Ref"): regex = "Ref:(?P\S+)\s+(?:Fwd Px:(?P\S+)\s+)?" \ "Fwd(?: Spd)?:(?P\S+)\s+Fwd Bpv:(?P\S+)" \ "\s+Expiry:(?P\S+)" m = re.match(regex, line) try: d = m.groupdict() d['quotedate'] = quotedate d['index'] = indextype d['series'] = series d['expiry'] = pd.to_datetime(d['expiry'], format='%d-%b-%y') except AttributeError: print("something wrong with {0}".format(email_path.name)) continue if line.startswith("Strike"): flag = True r = [] continue if flag: if line: line = re.sub("[/|]", " ", line) vals = re.sub(" +", " ", line).rstrip().split(" ") r.append(vals) continue else: option_stack[d['expiry']] = makedf(r, indextype, d['ref']) fwd_index.append(d) flag = False r = [] continue if flag: option_stack[d['expiry']] = makedf(r, indextype, d['ref']) fwd_index.append(d) if option_stack: fwd_index = pd.DataFrame.from_records(fwd_index, index='quotedate') return (quotedate, indextype, series), option_stack, fwd_index else: raise RuntimeError("empty email: {0}".format(email_path.name)) if __name__=="__main__": update_emails() emails = [f for f in Path("../../data/swaptions").iterdir() if f.is_file()] swaption_stack = {} index_data = pd.DataFrame() for f in emails: try: key, option_stack, fwd_index = parse_email(f) except RuntimeError as e: print(e) else: swaption_stack[key] = pd.concat(option_stack, names=['expiry', 'strike']) index_data = index_data.append(fwd_index) for col in ['fwdbpv', 'fwdprice', 'fwdspread', 'ref']: index_data[col] = index_data[col].astype('float') index_data['index'] = index_data['index'].astype('category') swaption_stack = pd.concat(swaption_stack, names=['quotedate', 'indextype', 'series']) with pd.HDFStore('swaptions.hdf', mode = 'w', complevel=4, complib='blosc', fletcher32=True) as swaptions: swaptions.append('swaptions', swaption_stack) swaptions.append('index_data', index_data)