import pandas as pd import re from pathlib import Path import pdb emails = [f for f in Path("../../data/swaptions").iterdir() if f.is_file()] def makedf(r, indextype, ref): if indextype=='IG': cols = ['Strike', 'RecBid', 'RecOffer', 'DeltaRec', 'PayBid', 'PayOffer', 'DeltaPay', 'Vol', 'Gamma'] else: cols = ['Strike', 'RecBid', 'RecOffer', 'DeltaRec', 'PayBid', 'PayOffer', 'DeltaPay', 'Vol', 'PxVol', 'Gamma'] df = pd.DataFrame.from_records(r, columns = cols) df['ref'] = ref for col in ['DeltaRec', 'DeltaPay', 'Vol', 'PxVol', 'Gamma']: if col in df: df[col] = df[col].str.strip("%").astype('float')/100 for k in df: if df.dtypes[k]=='object': df[k] = pd.to_numeric(df[k]) df.set_index('Strike', inplace=True) return df masterdf = {} for f in emails: with f.open("rb") as fh: subject = fh.readline() m = re.match("(?:Fwd:)?(\w{2})([0-9]{1,2})\s", subject.decode('utf-8')) if m: indextype, series = m.groups() series = int(series) else: print("can't parse subject line for {0}".format(f)) print(subject.decode("utf-8")) continue flag = False allexpiriesdf = {} for line in fh: line = line.decode('utf-8', 'ignore') line = line.rstrip() if line.startswith("At"): for p in ['%m/%d %H:%M:%S', '%b %d %Y %H:%M:%S']: try: quotedate = pd.to_datetime(line, format=p, exact=False) except ValueError: continue else: if quotedate.year == 1900: quotedate = quotedate.replace(year=2015) break else: pdb.set_trace() if line.startswith("Ref"): m = re.match("Ref:(\S+)\s+(?:Fwd Px:(\S+)\s+)?Fwd(?: Spd)?:(\S+)\s+Fwd Bpv:(\S+)\s+Expiry:(\S+)", line) if m: if len(m.groups())==4: ref, fwspread, fwfwbpv, expiry = m.groups() elif len(m.groups())==5: ref, fwprice, fwspread, fwfwbpv, expiry = m.groups() else: print("something wrong with {0}".format(f)) expiry = pd.datetime.strptime(expiry, '%d-%b-%y') continue if line.startswith("Strike"): if "Px Vol" in line: indextype='HY' else: indextype='IG' flag = True r = [] continue if flag: if line: line = re.sub("[/|]", " ", line) vals = re.sub(" +", " ", line).rstrip().split(" ") r.append(vals) continue else: allexpiriesdf[expiry] = makedf(r, indextype, ref) flag = False r = [] continue if flag: allexpiriesdf[expiry] = makedf(r, indextype, ref) if allexpiriesdf: masterdf[(quotedate, indextype, series)] = pd.concat(allexpiriesdf, names=['expiry', 'Strike']) masterdf = pd.concat(masterdf, names=['quotedate', 'indextype', 'series']) masterdf.to_hdf('swaptions.hdf', key='swaptions')