diff options
| -rw-r--r-- | python/parse_emails.py | 132 |
1 files changed, 67 insertions, 65 deletions
diff --git a/python/parse_emails.py b/python/parse_emails.py index b49b8bdc..3b25cf20 100644 --- a/python/parse_emails.py +++ b/python/parse_emails.py @@ -2,8 +2,7 @@ import pandas as pd import re from pathlib import Path import pdb - -emails = [f for f in Path("../../data/swaptions").iterdir() if f.is_file()] +from download_emails import update_emails def makedf(r, indextype, ref): if indextype=='IG': @@ -20,72 +19,75 @@ def makedf(r, indextype, ref): for k in df: if df.dtypes[k]=='object': df[k] = pd.to_numeric(df[k]) - df.set_index('Strike', inplace=True) + df.set_index('strike', inplace=True) return df -masterdf = {} -for f in emails: - with f.open("rb") as fh: - subject = fh.readline() - m = re.match("(?:Fwd:)?(\w{2})([0-9]{1,2})\s", subject.decode('utf-8')) - if m: - indextype, series = m.groups() - series = int(series) - else: - print("can't parse subject line for {0}".format(f)) - print(subject.decode("utf-8")) - continue - flag = False - allexpiriesdf = {} - for line in fh: - line = line.decode('utf-8', 'ignore') - line = line.rstrip() - if line.startswith("At"): - for p in ['%m/%d %H:%M:%S', '%b %d %Y %H:%M:%S']: - try: - quotedate = pd.to_datetime(line, format=p, exact=False) - except ValueError: - continue - else: - if quotedate.year == 1900: - quotedate = quotedate.replace(year=2015) - break - else: - pdb.set_trace() - if line.startswith("Ref"): - m = re.match("Ref:(\S+)\s+(?:Fwd Px:(\S+)\s+)?Fwd(?: Spd)?:(\S+)\s+Fwd Bpv:(\S+)\s+Expiry:(\S+)", - line) - if m: - if len(m.groups())==4: - ref, fwspread, fwfwbpv, expiry = m.groups() - elif len(m.groups())==5: - ref, fwprice, fwspread, fwfwbpv, expiry = m.groups() - else: - print("something wrong with {0}".format(f)) - expiry = pd.datetime.strptime(expiry, '%d-%b-%y') +if __name__=="__main__": + update_emails() + emails = [f for f in Path("../../data/swaptions").iterdir() if f.is_file()] + masterdf = {} + for f in emails: + with f.open("rb") as fh: + subject = fh.readline() + m = re.match("(?:Fwd:)?(\w{2})([0-9]{1,2})\s", subject.decode('utf-8')) + if m: + indextype, series = m.groups() + series = int(series) + else: + print("can't parse subject line for {0}".format(f)) + print(subject.decode("utf-8")) continue - if line.startswith("Strike"): - if "Px Vol" in line: - indextype='HY' - else: - indextype='IG' - flag = True - r = [] - continue - if flag: - if line: - line = re.sub("[/|]", " ", line) - vals = re.sub(" +", " ", line).rstrip().split(" ") - r.append(vals) + flag = False + allexpiriesdf = {} + for line in fh: + line = line.decode('utf-8', 'ignore') + line = line.rstrip() + if line.startswith("At"): + for p in ['%m/%d %H:%M:%S', '%b %d %Y %H:%M:%S']: + try: + quotedate = pd.to_datetime(line, format=p, exact=False) + except ValueError: + continue + else: + if quotedate.year == 1900: + quotedate = quotedate.replace(year=2015) + break + else: + pdb.set_trace() + if line.startswith("Ref"): + m = re.match("Ref:(\S+)\s+(?:Fwd Px:(\S+)\s+)?Fwd(?: Spd)?:(\S+)\s+Fwd Bpv:(\S+)\s+Expiry:(\S+)", + line) + if m: + if len(m.groups())==4: + ref, fwspread, fwfwbpv, expiry = m.groups() + elif len(m.groups())==5: + ref, fwprice, fwspread, fwfwbpv, expiry = m.groups() + else: + print("something wrong with {0}".format(f)) + expiry = pd.datetime.strptime(expiry, '%d-%b-%y') continue - else: - allexpiriesdf[expiry] = makedf(r, indextype, ref) - flag = False + if line.startswith("Strike"): + if "Px Vol" in line: + indextype='HY' + else: + indextype='IG' + flag = True r = [] continue - if flag: - allexpiriesdf[expiry] = makedf(r, indextype, ref) - if allexpiriesdf: - masterdf[(quotedate, indextype, series)] = pd.concat(allexpiriesdf, names=['expiry', 'Strike']) -masterdf = pd.concat(masterdf, names=['quotedate', 'indextype', 'series']) -masterdf.to_hdf('swaptions.hdf', key='swaptions') + if flag: + if line: + line = re.sub("[/|]", " ", line) + vals = re.sub(" +", " ", line).rstrip().split(" ") + r.append(vals) + continue + else: + allexpiriesdf[expiry] = makedf(r, indextype, ref) + flag = False + r = [] + continue + if flag: + allexpiriesdf[expiry] = makedf(r, indextype, ref) + if allexpiriesdf: + masterdf[(quotedate, indextype, series)] = pd.concat(allexpiriesdf, names=['expiry', 'strike']) + masterdf = pd.concat(masterdf, names=['quotedate', 'indextype', 'series']) + masterdf.to_hdf('swaptions.hdf', key='swaptions') |
