diff options
| -rw-r--r-- | python/parse_emails.py | 34 |
1 files changed, 25 insertions, 9 deletions
diff --git a/python/parse_emails.py b/python/parse_emails.py index aed45225..b49b8bdc 100644 --- a/python/parse_emails.py +++ b/python/parse_emails.py @@ -1,6 +1,7 @@ import pandas as pd import re from pathlib import Path +import pdb emails = [f for f in Path("../../data/swaptions").iterdir() if f.is_file()] @@ -16,7 +17,9 @@ def makedf(r, indextype, ref): for col in ['DeltaRec', 'DeltaPay', 'Vol', 'PxVol', 'Gamma']: if col in df: df[col] = df[col].str.strip("%").astype('float')/100 - df = df.convert_objects(convert_numeric=True) + for k in df: + if df.dtypes[k]=='object': + df[k] = pd.to_numeric(df[k]) df.set_index('Strike', inplace=True) return df @@ -38,16 +41,28 @@ for f in emails: line = line.decode('utf-8', 'ignore') line = line.rstrip() if line.startswith("At"): - quotedate = pd.to_datetime(line[4:]) - if quotedate >= pd.to_datetime('2015-04-01'): - continue + for p in ['%m/%d %H:%M:%S', '%b %d %Y %H:%M:%S']: + try: + quotedate = pd.to_datetime(line, format=p, exact=False) + except ValueError: + continue + else: + if quotedate.year == 1900: + quotedate = quotedate.replace(year=2015) + break + else: + pdb.set_trace() if line.startswith("Ref"): - m = re.match("Ref:(\S+)\s+Fwd(?: Spd)?:(\S+)\s+Fwd Bpv:(\S+)\s+Expiry:(\S+)", line) + m = re.match("Ref:(\S+)\s+(?:Fwd Px:(\S+)\s+)?Fwd(?: Spd)?:(\S+)\s+Fwd Bpv:(\S+)\s+Expiry:(\S+)", + line) if m: - ref, fwspread, fwbpv, expiry = m.groups() - expiry = pd.datetime.strptime(expiry, '%d-%b-%y') + if len(m.groups())==4: + ref, fwspread, fwfwbpv, expiry = m.groups() + elif len(m.groups())==5: + ref, fwprice, fwspread, fwfwbpv, expiry = m.groups() else: print("something wrong with {0}".format(f)) + expiry = pd.datetime.strptime(expiry, '%d-%b-%y') continue if line.startswith("Strike"): if "Px Vol" in line: @@ -70,6 +85,7 @@ for f in emails: continue if flag: allexpiriesdf[expiry] = makedf(r, indextype, ref) - masterdf[(quotedate, indextype, series)] = pd.concat(allexpiriesdf, names=['expiry', 'Strike']) -masterdf = pd.concat(masterdf, names=['indextype', 'series', 'quotedate']) + if allexpiriesdf: + masterdf[(quotedate, indextype, series)] = pd.concat(allexpiriesdf, names=['expiry', 'Strike']) +masterdf = pd.concat(masterdf, names=['quotedate', 'indextype', 'series']) masterdf.to_hdf('swaptions.hdf', key='swaptions') |
