aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/parse_emails.py66
1 files changed, 38 insertions, 28 deletions
diff --git a/python/parse_emails.py b/python/parse_emails.py
index 65cb7810..65a8b091 100644
--- a/python/parse_emails.py
+++ b/python/parse_emails.py
@@ -27,7 +27,7 @@ def parse_email(email_path):
with email_path.open("rb") as fh:
date_received = datetime.datetime.fromtimestamp(int(fh.readline())/1000)
subject = fh.readline()
- m = re.match("(?:Fwd:)?(?:BAML )?(\w{2})([0-9]{1,2})\s", subject.decode('utf-8'))
+ m = re.match("(?:Fwd:){0,2}(?:BAML )?(\w{2})([0-9]{1,2})\s", subject.decode('utf-8'))
if m:
indextype, series = m.groups()
series = int(series)
@@ -35,7 +35,8 @@ def parse_email(email_path):
raise RuntimeError("can't parse subject line: {0} for email {1}".format(
subject.decode("utf-8"), email_path.name))
flag = False
- allexpiriesdf = {}
+ option_stack = {}
+ fwd_index = []
for line in fh:
line = line.decode('utf-8', 'ignore')
line = line.rstrip()
@@ -52,22 +53,20 @@ def parse_email(email_path):
else:
raise RuntimeError("can't parse date")
if line.startswith("Ref"):
- m = re.match("Ref:(\S+)\s+(?:Fwd Px:(\S+)\s+)?Fwd(?: Spd)?:(\S+)\s+Fwd Bpv:(\S+)\s+Expiry:(\S+)",
- line)
- if m:
- if len(m.groups())==4:
- ref, fwspread, fwfwbpv, expiry = m.groups()
- elif len(m.groups())==5:
- ref, fwprice, fwspread, fwfwbpv, expiry = m.groups()
- else:
- print("something wrong with {0}".format(f))
- expiry = pd.datetime.strptime(expiry, '%d-%b-%y')
+ regex = "Ref:(?P<ref>\S+)\s+(?:Fwd Px:(?P<fwdprice>\S+)\s+)?" \
+ "Fwd(?: Spd)?:(?P<fwdspread>\S+)\s+Fwd Bpv:(?P<fwdbpv>\S+)" \
+ "\s+Expiry:(?P<expiry>\S+)"
+ m = re.match(regex, line)
+ try:
+ d = m.groupdict()
+ d['quotedate'] = quotedate
+ d['index'] = indextype
+ d['series'] = series
+ d['expiry'] = pd.to_datetime(d['expiry'], format='%d-%b-%y')
+ except AttributeError:
+ print("something wrong with {0}".format(email_path.name))
continue
if line.startswith("Strike"):
- if "Px Vol" in line:
- indextype='HY'
- else:
- indextype='IG'
flag = True
r = []
continue
@@ -78,28 +77,39 @@ def parse_email(email_path):
r.append(vals)
continue
else:
- allexpiriesdf[expiry] = makedf(r, indextype, ref)
+ option_stack[d['expiry']] = makedf(r, indextype, d['ref'])
+ fwd_index.append(d)
flag = False
r = []
continue
if flag:
- allexpiriesdf[expiry] = makedf(r, indextype, ref)
- if allexpiriesdf:
- return (quotedate, indextype, series), allexpiriesdf
+ option_stack[d['expiry']] = makedf(r, indextype, d['ref'])
+ fwd_index.append(d)
+ if option_stack:
+ fwd_index = pd.DataFrame.from_records(fwd_index,
+ index='quotedate')
+ return (quotedate, indextype, series), option_stack, fwd_index
else:
- raise RuntimeError("empty email")
+ raise RuntimeError("empty email: {0}".format(email_path.name))
if __name__=="__main__":
- #update_emails()
+ update_emails()
emails = [f for f in Path("../../data/swaptions").iterdir() if f.is_file()]
- masterdf = {}
+ swaption_stack = {}
+ index_data = pd.DataFrame()
for f in emails:
try:
- key, allexpiriesdf = parse_email(f)
+ key, option_stack, fwd_index = parse_email(f)
except RuntimeError as e:
print(e)
- print(f.name)
else:
- masterdf[key] = pd.concat(allexpiriesdf, names=['expiry', 'strike'])
- masterdf = pd.concat(masterdf, names=['quotedate', 'indextype', 'series'])
- masterdf.to_hdf('swaptions.hdf', key='swaptions')
+ swaption_stack[key] = pd.concat(option_stack, names=['expiry', 'strike'])
+ index_data = index_data.append(fwd_index)
+ for col in ['fwdbpv', 'fwdprice', 'fwdspread', 'ref']:
+ index_data[col] = index_data[col].astype('float')
+ index_data['index'] = index_data['index'].astype('category')
+ swaption_stack = pd.concat(swaption_stack, names=['quotedate', 'indextype', 'series'])
+ with pd.HDFStore('swaptions.hdf', mode = 'w', complevel=4,
+ complib='blosc', fletcher32=True) as swaptions:
+ swaptions.append('swaptions', swaption_stack)
+ swaptions.append('index_data', index_data)