1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
|
import pandas as pd
import re
from pathlib import Path
import pdb
from download_emails import update_emails
import datetime
def makedf(r, indextype, ref):
if indextype=='IG':
cols = ['Strike', 'RecBid', 'RecOffer', 'DeltaRec', 'PayBid',
'PayOffer', 'DeltaPay', 'Vol', 'Gamma']
else:
cols = ['Strike', 'RecBid', 'RecOffer', 'DeltaRec', 'PayBid',
'PayOffer', 'DeltaPay', 'Vol', 'PxVol', 'Gamma']
df = pd.DataFrame.from_records(r, columns = cols)
df['ref'] = ref
for col in ['DeltaRec', 'DeltaPay', 'Vol', 'PxVol', 'Gamma']:
if col in df:
df[col] = df[col].str.strip("%").astype('float')/100
for k in df:
if df.dtypes[k]=='object':
df[k] = pd.to_numeric(df[k])
df.set_index('Strike', inplace=True)
return df
def parse_email(email_path):
with email_path.open("rb") as fh:
date_received = datetime.datetime.fromtimestamp(int(fh.readline())/1000)
subject = fh.readline()
m = re.match("(?:Fwd:){0,2}(?:BAML )?(\w{2})([0-9]{1,2})\s", subject.decode('utf-8'))
if m:
indextype, series = m.groups()
series = int(series)
else:
raise RuntimeError("can't parse subject line: {0} for email {1}".format(
subject.decode("utf-8"), email_path.name))
flag = False
option_stack = {}
fwd_index = []
for line in fh:
line = line.decode('utf-8', 'ignore')
line = line.rstrip()
if line.startswith("At"):
for p in ['%m/%d %H:%M:%S', '%b %d %Y %H:%M:%S']:
try:
quotedate = pd.to_datetime(line, format=p, exact=False)
except ValueError:
continue
else:
if quotedate.year == 1900:
quotedate = quotedate.replace(year=date_received.year)
break
else:
raise RuntimeError("can't parse date")
if line.startswith("Ref"):
regex = "Ref:(?P<ref>\S+)\s+(?:Fwd Px:(?P<fwdprice>\S+)\s+)?" \
"Fwd(?: Spd)?:(?P<fwdspread>\S+)\s+Fwd Bpv:(?P<fwdbpv>\S+)" \
"\s+Expiry:(?P<expiry>\S+)"
m = re.match(regex, line)
try:
d = m.groupdict()
d['quotedate'] = quotedate
d['index'] = indextype
d['series'] = series
d['expiry'] = pd.to_datetime(d['expiry'], format='%d-%b-%y')
except AttributeError:
print("something wrong with {0}".format(email_path.name))
continue
if line.startswith("Strike"):
flag = True
r = []
continue
if flag:
if line:
line = re.sub("[/|]", " ", line)
vals = re.sub(" +", " ", line).rstrip().split(" ")
r.append(vals)
continue
else:
option_stack[d['expiry']] = makedf(r, indextype, d['ref'])
fwd_index.append(d)
flag = False
r = []
continue
if flag:
option_stack[d['expiry']] = makedf(r, indextype, d['ref'])
fwd_index.append(d)
if option_stack:
fwd_index = pd.DataFrame.from_records(fwd_index,
index='quotedate')
return (quotedate, indextype, series), option_stack, fwd_index
else:
raise RuntimeError("empty email: {0}".format(email_path.name))
if __name__=="__main__":
update_emails()
emails = [f for f in Path("../../data/swaptions").iterdir() if f.is_file()]
swaption_stack = {}
index_data = pd.DataFrame()
for f in emails:
try:
key, option_stack, fwd_index = parse_email(f)
except RuntimeError as e:
print(e)
else:
swaption_stack[key] = pd.concat(option_stack, names=['expiry', 'strike'])
index_data = index_data.append(fwd_index)
for col in ['fwdbpv', 'fwdprice', 'fwdspread', 'ref']:
index_data[col] = index_data[col].astype('float')
index_data['index'] = index_data['index'].astype('category')
swaption_stack = pd.concat(swaption_stack, names=['quotedate', 'indextype', 'series'])
with pd.HDFStore('swaptions.hdf', mode = 'w', complevel=4,
complib='blosc', fletcher32=True) as swaptions:
swaptions.append('swaptions', swaption_stack)
swaptions.append('index_data', index_data)
|