import pandas as pd import re from pathlib import Path emails = [f for f in Path("quotes").iterdir() if f.is_file()] def makedf(r, indextype, ref): if indextype=='IG': cols = ['Strike', 'RecBid', 'RecOffer', 'DeltaRec', 'PayBid', 'PayOffer', 'DeltaPay', 'Vol', 'Gamma'] else: cols = ['Strike', 'RecBid', 'RecOffer', 'DeltaRec', 'PayBid', 'PayOffer', 'DeltaPay', 'Vol', 'PxVol', 'Gamma'] df = pd.DataFrame.from_records(r, columns = cols) df['ref'] = ref for col in ['DeltaRec', 'DeltaPay', 'Vol', 'PxVol', 'Gamma']: if col in df: df[col] = df[col].str.strip("%").astype('float')/100 df = df.convert_objects(convert_numeric=True) df.set_index('Strike', inplace=True) return df masterdf = {} for f in emails: with f.open("rb") as fh: subject = next(fh) m = re.match("(?:Fwd:)?(\w{2})(\S+)\s", subject.decode('utf-8')) if m: indextype, series = m.groups() series = int(series) if indextype=='HY' and series==24: print('{0}'.format(f)) else: print("can't parse subject line for {0}".format(f)) flag = False allexpiriesdf = {} for line in fh: line = line.decode('utf-8', 'ignore') line = line.rstrip() if line.startswith("At"): quotedate = pd.to_datetime(line[4:]) if quotedate >= pd.to_datetime('2015-04-01'): continue if line.startswith("Ref"): m = re.match("Ref:(\S+)\s+Fwd(?: Spd)?:(\S+)\s+Fwd Bpv:(\S+)\s+Expiry:(\S+)", line) if m: ref, fwspread, fwbpv, expiry = m.groups() expiry = pd.datetime.strptime(expiry, '%d-%b-%y') else: print("something wrong with {0}".format(f)) continue if line.startswith("Strike"): if "Px Vol" in line: indextype='HY' else: indextype='IG' flag = True r = [] continue if flag: if line: line = re.sub("[/|]", " ", line) vals = re.sub(" +", " ", line).rstrip().split(" ") r.append(vals) continue else: allexpiriesdf[expiry] = makedf(r, indextype, ref) flag = False r = [] continue if flag: allexpiriesdf[expiry] = makedf(r, indextype, ref) masterdf[(quotedate, indextype, series)] = pd.concat(allexpiriesdf, names=['expiry', 'Strike']) masterdf = pd.concat(masterdf, names=['indextype', 'series', 'quotedate']) masterdf.to_hdf('swaptions.hdf', key='swaptions')