from db import dbconn from exchange import get_msgs from pytz import timezone from parse_emails import write_todb import datetime import logging import pandas as pd import re class ParseError(Exception): pass def parse_email(email, fwd_index): m = re.search("(IG|HY)(\d{2}) 5y (?:.*)SWAPTION (?:UPDATE|CLOSES|CLOSE) - Ref\D+(.+)$", email.subject) if m: indextype, series, ref = m.groups() series = int(series) if indextype == 'HY': refprice, refspread = map(float, re.match("([\S]+)\s+\(([^)]+)\)", ref).groups()) else: refspread = float(ref) else: raise ParseError(f"can't parse subject line: {email.subject}") quotedate = datetime.datetime.fromtimestamp(email.datetime_sent.timestamp(), timezone('America/New_York')) flag = False masterdf = {} for line in email.body.split("\r\n"): if line.startswith("Expiry"): m = re.match("Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)", line) if m: date, fwprice, fwspread = m.groups() date = pd.to_datetime(date, format='%d%b%y') continue if line.startswith("Stk"): flag = True r = [] continue if flag: if line: vals = re.sub(" +", " ", line).split(" ") if indextype == 'HY': vals.pop(2) vals.pop(9) else: vals.pop(1) vals.pop(8) r.append(vals) continue else: if indextype=='HY': cols = ['Strike', 'Sprd', 'Pay', 'DeltaPay', 'Rec', 'Vol', 'VolChg', 'VolBpd', 'Tail'] else: cols = ['Strike', 'Pay', 'DeltaPay', 'Rec', 'Vol', 'VolChg', 'VolBpd', 'Tail'] df = pd.DataFrame.from_records(r, columns = cols) df[['PayBid', 'PayOffer']] = df.Pay.str.split('/', expand=True) df[['RecBid', 'RecOffer']] = df.Rec.str.split('/', expand=True) df.drop(['Pay', 'Rec'], axis=1, inplace=True) for col in df: df[col] = pd.to_numeric(df[col], errors = 'coerce') df.set_index('Strike', inplace=True) d = {'quotedate': quotedate, 'expiry': date, 'index': indextype, 'series': series, 'ref': refspread if indextype =="IG" else refprice} if indextype == "IG": d['fwdspread'] = float(fwspread) else: d['fwdprice'] = float(fwprice) fwd_index.append(d) masterdf[date] = df flag = False r = [] continue return (quotedate, indextype, series, pd.concat(masterdf, names=['expiry'])) def clean_df(all_df): all_df = pd.concat(all_df, names=['quotedate', 'index', 'series']) all_df['DeltaPay'] = - all_df['DeltaPay']/100 all_df['Vol'] /= 100 all_df.reset_index(inplace=True) all_df = all_df.rename(columns={'Strike':'strike', 'Vol': 'vol', 'PayOffer': 'pay_offer', 'PayBid': 'pay_bid', 'RecOffer': 'rec_offer', 'RecBid': 'rec_bid', 'Tail': 'tail', 'DeltaPay': 'delta_pay'}) del all_df['VolBpd'], all_df['VolChg'] if 'Sprd' in all_df: del all_df['Sprd'] return all_df if __name__=="__main__": fwd_index = [] swaption_quotes = {} for email in get_msgs(count=20): try: quotedate, indextype, series, df = parse_email(email, fwd_index) except ParseError as e: logging.exception(e) swaption_quotes[(quotedate, indextype, series)] = df index_df = pd.DataFrame(fwd_index) index_df = index_df.drop_duplicates(['quotedate', 'index', 'series', 'expiry']) index_df['quote_source'] = 'GS' swaption_quotes = clean_df(swaption_quotes) swaption_quotes = swaption_quotes.set_index(['quotedate', 'index', 'series', 'expiry']) write_todb(swaption_quotes, index_df)