diff options
Diffstat (limited to 'python')
| -rw-r--r-- | python/parse_emails.py | 75 |
1 files changed, 75 insertions, 0 deletions
diff --git a/python/parse_emails.py b/python/parse_emails.py new file mode 100644 index 00000000..93383fe7 --- /dev/null +++ b/python/parse_emails.py @@ -0,0 +1,75 @@ +import pandas as pd +import re +from pathlib import Path + +emails = [f for f in Path("quotes").iterdir() if f.is_file()] + +def makedf(r, indextype, ref): + if indextype=='IG': + cols = ['Strike', 'RecBid', 'RecOffer', 'DeltaRec', 'PayBid', + 'PayOffer', 'DeltaPay', 'Vol', 'Gamma'] + else: + cols = ['Strike', 'RecBid', 'RecOffer', 'DeltaRec', 'PayBid', + 'PayOffer', 'DeltaPay', 'Vol', 'PxVol', 'Gamma'] + df = pd.DataFrame.from_records(r, columns = cols) + df['ref'] = ref + for col in ['DeltaRec', 'DeltaPay', 'Vol', 'PxVol', 'Gamma']: + if col in df: + df[col] = df[col].str.strip("%").astype('float')/100 + df = df.convert_objects(convert_numeric=True) + df.set_index('Strike', inplace=True) + return df + +masterdf = {} +for f in emails: + with f.open("rb") as fh: + subject = next(fh) + m = re.match("(?:Fwd:)?(\w{2})(\S+)\s", subject.decode('utf-8')) + if m: + indextype, series = m.groups() + series = int(series) + if indextype=='HY' and series==24: + print('{0}'.format(f)) + else: + print("can't parse subject line for {0}".format(f)) + flag = False + allexpiriesdf = {} + for line in fh: + line = line.decode('utf-8', 'ignore') + line = line.rstrip() + if line.startswith("At"): + quotedate = pd.to_datetime(line[4:]) + if quotedate >= pd.to_datetime('2015-04-01'): + continue + if line.startswith("Ref"): + m = re.match("Ref:(\S+)\s+Fwd(?: Spd)?:(\S+)\s+Fwd Bpv:(\S+)\s+Expiry:(\S+)", line) + if m: + ref, fwspread, fwbpv, expiry = m.groups() + expiry = pd.datetime.strptime(expiry, '%d-%b-%y') + else: + print("something wrong with {0}".format(f)) + continue + if line.startswith("Strike"): + if "Px Vol" in line: + indextype='HY' + else: + indextype='IG' + flag = True + r = [] + continue + if flag: + if line: + line = re.sub("[/|]", " ", line) + vals = re.sub(" +", " ", line).rstrip().split(" ") + r.append(vals) + continue + else: + allexpiriesdf[expiry] = makedf(r, indextype, ref) + flag = False + r = [] + continue + if flag: + allexpiriesdf[expiry] = makedf(r, indextype, ref) + masterdf[(quotedate, indextype, series)] = pd.concat(allexpiriesdf, names=['expiry', 'Strike']) +masterdf = pd.concat(masterdf, names=['indextype', 'series', 'quotedate']) +masterdf.to_hdf('swaptions.hdf', key='swaptions') |
