import pandas as pd import re from pathlib import Path import pdb from download_emails import update_emails import datetime def makedf(r, indextype, ref): if indextype=='IG': cols = ['Strike', 'RecBid', 'RecOffer', 'DeltaRec', 'PayBid', 'PayOffer', 'DeltaPay', 'Vol', 'Gamma'] else: cols = ['Strike', 'RecBid', 'RecOffer', 'DeltaRec', 'PayBid', 'PayOffer', 'DeltaPay', 'Vol', 'PxVol', 'Gamma'] df = pd.DataFrame.from_records(r, columns = cols) df['ref'] = ref for col in ['DeltaRec', 'DeltaPay', 'Vol', 'PxVol', 'Gamma']: if col in df: df[col] = df[col].str.strip("%").astype('float')/100 for k in df: if df.dtypes[k]=='object': df[k] = pd.to_numeric(df[k]) df.set_index('Strike', inplace=True) return df def parse_email(email_path): with email_path.open("rb") as fh: date_received = datetime.datetime.fromtimestamp(int(fh.readline())/1000) subject = fh.readline() m = re.match("(?:Fwd:)?(?:BAML )?(\w{2})([0-9]{1,2})\s", subject.decode('utf-8')) if m: indextype, series = m.groups() series = int(series) else: raise RuntimeError("can't parse subject line: {0} for email {1}".format( subject.decode("utf-8"), email_path.name)) flag = False allexpiriesdf = {} for line in fh: line = line.decode('utf-8', 'ignore') line = line.rstrip() if line.startswith("At"): for p in ['%m/%d %H:%M:%S', '%b %d %Y %H:%M:%S']: try: quotedate = pd.to_datetime(line, format=p, exact=False) except ValueError: continue else: if quotedate.year == 1900: quotedate = quotedate.replace(year=date_received.year) break else: raise RuntimeError("can't parse date") if line.startswith("Ref"): m = re.match("Ref:(\S+)\s+(?:Fwd Px:(\S+)\s+)?Fwd(?: Spd)?:(\S+)\s+Fwd Bpv:(\S+)\s+Expiry:(\S+)", line) if m: if len(m.groups())==4: ref, fwspread, fwfwbpv, expiry = m.groups() elif len(m.groups())==5: ref, fwprice, fwspread, fwfwbpv, expiry = m.groups() else: print("something wrong with {0}".format(f)) expiry = pd.datetime.strptime(expiry, '%d-%b-%y') continue if line.startswith("Strike"): if "Px Vol" in line: indextype='HY' else: indextype='IG' flag = True r = [] continue if flag: if line: line = re.sub("[/|]", " ", line) vals = re.sub(" +", " ", line).rstrip().split(" ") r.append(vals) continue else: allexpiriesdf[expiry] = makedf(r, indextype, ref) flag = False r = [] continue if flag: allexpiriesdf[expiry] = makedf(r, indextype, ref) if allexpiriesdf: return (quotedate, indextype, series), allexpiriesdf else: raise RuntimeError("empty email") if __name__=="__main__": #update_emails() emails = [f for f in Path("../../data/swaptions").iterdir() if f.is_file()] masterdf = {} for f in emails: try: key, allexpiriesdf = parse_email(f) except RuntimeError as e: print(e) print(f.name) else: masterdf[key] = pd.concat(allexpiriesdf, names=['expiry', 'strike']) masterdf = pd.concat(masterdf, names=['quotedate', 'indextype', 'series']) masterdf.to_hdf('swaptions.hdf', key='swaptions')