aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/parse_emails.py34
1 files changed, 25 insertions, 9 deletions
diff --git a/python/parse_emails.py b/python/parse_emails.py
index aed45225..b49b8bdc 100644
--- a/python/parse_emails.py
+++ b/python/parse_emails.py
@@ -1,6 +1,7 @@
import pandas as pd
import re
from pathlib import Path
+import pdb
emails = [f for f in Path("../../data/swaptions").iterdir() if f.is_file()]
@@ -16,7 +17,9 @@ def makedf(r, indextype, ref):
for col in ['DeltaRec', 'DeltaPay', 'Vol', 'PxVol', 'Gamma']:
if col in df:
df[col] = df[col].str.strip("%").astype('float')/100
- df = df.convert_objects(convert_numeric=True)
+ for k in df:
+ if df.dtypes[k]=='object':
+ df[k] = pd.to_numeric(df[k])
df.set_index('Strike', inplace=True)
return df
@@ -38,16 +41,28 @@ for f in emails:
line = line.decode('utf-8', 'ignore')
line = line.rstrip()
if line.startswith("At"):
- quotedate = pd.to_datetime(line[4:])
- if quotedate >= pd.to_datetime('2015-04-01'):
- continue
+ for p in ['%m/%d %H:%M:%S', '%b %d %Y %H:%M:%S']:
+ try:
+ quotedate = pd.to_datetime(line, format=p, exact=False)
+ except ValueError:
+ continue
+ else:
+ if quotedate.year == 1900:
+ quotedate = quotedate.replace(year=2015)
+ break
+ else:
+ pdb.set_trace()
if line.startswith("Ref"):
- m = re.match("Ref:(\S+)\s+Fwd(?: Spd)?:(\S+)\s+Fwd Bpv:(\S+)\s+Expiry:(\S+)", line)
+ m = re.match("Ref:(\S+)\s+(?:Fwd Px:(\S+)\s+)?Fwd(?: Spd)?:(\S+)\s+Fwd Bpv:(\S+)\s+Expiry:(\S+)",
+ line)
if m:
- ref, fwspread, fwbpv, expiry = m.groups()
- expiry = pd.datetime.strptime(expiry, '%d-%b-%y')
+ if len(m.groups())==4:
+ ref, fwspread, fwfwbpv, expiry = m.groups()
+ elif len(m.groups())==5:
+ ref, fwprice, fwspread, fwfwbpv, expiry = m.groups()
else:
print("something wrong with {0}".format(f))
+ expiry = pd.datetime.strptime(expiry, '%d-%b-%y')
continue
if line.startswith("Strike"):
if "Px Vol" in line:
@@ -70,6 +85,7 @@ for f in emails:
continue
if flag:
allexpiriesdf[expiry] = makedf(r, indextype, ref)
- masterdf[(quotedate, indextype, series)] = pd.concat(allexpiriesdf, names=['expiry', 'Strike'])
-masterdf = pd.concat(masterdf, names=['indextype', 'series', 'quotedate'])
+ if allexpiriesdf:
+ masterdf[(quotedate, indextype, series)] = pd.concat(allexpiriesdf, names=['expiry', 'Strike'])
+masterdf = pd.concat(masterdf, names=['quotedate', 'indextype', 'series'])
masterdf.to_hdf('swaptions.hdf', key='swaptions')