aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/parse_emails.py132
1 files changed, 67 insertions, 65 deletions
diff --git a/python/parse_emails.py b/python/parse_emails.py
index b49b8bdc..3b25cf20 100644
--- a/python/parse_emails.py
+++ b/python/parse_emails.py
@@ -2,8 +2,7 @@ import pandas as pd
import re
from pathlib import Path
import pdb
-
-emails = [f for f in Path("../../data/swaptions").iterdir() if f.is_file()]
+from download_emails import update_emails
def makedf(r, indextype, ref):
if indextype=='IG':
@@ -20,72 +19,75 @@ def makedf(r, indextype, ref):
for k in df:
if df.dtypes[k]=='object':
df[k] = pd.to_numeric(df[k])
- df.set_index('Strike', inplace=True)
+ df.set_index('strike', inplace=True)
return df
-masterdf = {}
-for f in emails:
- with f.open("rb") as fh:
- subject = fh.readline()
- m = re.match("(?:Fwd:)?(\w{2})([0-9]{1,2})\s", subject.decode('utf-8'))
- if m:
- indextype, series = m.groups()
- series = int(series)
- else:
- print("can't parse subject line for {0}".format(f))
- print(subject.decode("utf-8"))
- continue
- flag = False
- allexpiriesdf = {}
- for line in fh:
- line = line.decode('utf-8', 'ignore')
- line = line.rstrip()
- if line.startswith("At"):
- for p in ['%m/%d %H:%M:%S', '%b %d %Y %H:%M:%S']:
- try:
- quotedate = pd.to_datetime(line, format=p, exact=False)
- except ValueError:
- continue
- else:
- if quotedate.year == 1900:
- quotedate = quotedate.replace(year=2015)
- break
- else:
- pdb.set_trace()
- if line.startswith("Ref"):
- m = re.match("Ref:(\S+)\s+(?:Fwd Px:(\S+)\s+)?Fwd(?: Spd)?:(\S+)\s+Fwd Bpv:(\S+)\s+Expiry:(\S+)",
- line)
- if m:
- if len(m.groups())==4:
- ref, fwspread, fwfwbpv, expiry = m.groups()
- elif len(m.groups())==5:
- ref, fwprice, fwspread, fwfwbpv, expiry = m.groups()
- else:
- print("something wrong with {0}".format(f))
- expiry = pd.datetime.strptime(expiry, '%d-%b-%y')
+if __name__=="__main__":
+ update_emails()
+ emails = [f for f in Path("../../data/swaptions").iterdir() if f.is_file()]
+ masterdf = {}
+ for f in emails:
+ with f.open("rb") as fh:
+ subject = fh.readline()
+ m = re.match("(?:Fwd:)?(\w{2})([0-9]{1,2})\s", subject.decode('utf-8'))
+ if m:
+ indextype, series = m.groups()
+ series = int(series)
+ else:
+ print("can't parse subject line for {0}".format(f))
+ print(subject.decode("utf-8"))
continue
- if line.startswith("Strike"):
- if "Px Vol" in line:
- indextype='HY'
- else:
- indextype='IG'
- flag = True
- r = []
- continue
- if flag:
- if line:
- line = re.sub("[/|]", " ", line)
- vals = re.sub(" +", " ", line).rstrip().split(" ")
- r.append(vals)
+ flag = False
+ allexpiriesdf = {}
+ for line in fh:
+ line = line.decode('utf-8', 'ignore')
+ line = line.rstrip()
+ if line.startswith("At"):
+ for p in ['%m/%d %H:%M:%S', '%b %d %Y %H:%M:%S']:
+ try:
+ quotedate = pd.to_datetime(line, format=p, exact=False)
+ except ValueError:
+ continue
+ else:
+ if quotedate.year == 1900:
+ quotedate = quotedate.replace(year=2015)
+ break
+ else:
+ pdb.set_trace()
+ if line.startswith("Ref"):
+ m = re.match("Ref:(\S+)\s+(?:Fwd Px:(\S+)\s+)?Fwd(?: Spd)?:(\S+)\s+Fwd Bpv:(\S+)\s+Expiry:(\S+)",
+ line)
+ if m:
+ if len(m.groups())==4:
+ ref, fwspread, fwfwbpv, expiry = m.groups()
+ elif len(m.groups())==5:
+ ref, fwprice, fwspread, fwfwbpv, expiry = m.groups()
+ else:
+ print("something wrong with {0}".format(f))
+ expiry = pd.datetime.strptime(expiry, '%d-%b-%y')
continue
- else:
- allexpiriesdf[expiry] = makedf(r, indextype, ref)
- flag = False
+ if line.startswith("Strike"):
+ if "Px Vol" in line:
+ indextype='HY'
+ else:
+ indextype='IG'
+ flag = True
r = []
continue
- if flag:
- allexpiriesdf[expiry] = makedf(r, indextype, ref)
- if allexpiriesdf:
- masterdf[(quotedate, indextype, series)] = pd.concat(allexpiriesdf, names=['expiry', 'Strike'])
-masterdf = pd.concat(masterdf, names=['quotedate', 'indextype', 'series'])
-masterdf.to_hdf('swaptions.hdf', key='swaptions')
+ if flag:
+ if line:
+ line = re.sub("[/|]", " ", line)
+ vals = re.sub(" +", " ", line).rstrip().split(" ")
+ r.append(vals)
+ continue
+ else:
+ allexpiriesdf[expiry] = makedf(r, indextype, ref)
+ flag = False
+ r = []
+ continue
+ if flag:
+ allexpiriesdf[expiry] = makedf(r, indextype, ref)
+ if allexpiriesdf:
+ masterdf[(quotedate, indextype, series)] = pd.concat(allexpiriesdf, names=['expiry', 'strike'])
+ masterdf = pd.concat(masterdf, names=['quotedate', 'indextype', 'series'])
+ masterdf.to_hdf('swaptions.hdf', key='swaptions')