diff options
Diffstat (limited to 'python/parse_gs_exchange.py')
| -rw-r--r-- | python/parse_gs_exchange.py | 144 |
1 files changed, 144 insertions, 0 deletions
diff --git a/python/parse_gs_exchange.py b/python/parse_gs_exchange.py new file mode 100644 index 00000000..9194a2a5 --- /dev/null +++ b/python/parse_gs_exchange.py @@ -0,0 +1,144 @@ +from db import dbconn +from exchangelib import Credentials, Mailbox, Configuration, Account, DELEGATE +from pytz import timezone + +import datetime +import json +import os +import pandas as pd +import re + +def get_msgs(email_address='ghorel@lmcg.com', count=None): + with open(os.path.join('.credentials', email_address + '.json')) as fh: + creds = json.load(fh) + credentials = Credentials(**creds) + config = Configuration(server='autodiscover.lmcg.com', credentials=credentials) + account = Account(primary_smtp_address=email_address, config=config, + autodiscover=False, access_type=DELEGATE) + folder = account.root.get_folder_by_name('GS').get_folder_by_name('Swaptions') + if count: + for msg in folder.all().order_by('-datetime_sent')[:count]: + yield msg + else: + for msg in folder.all().order_by('-datetime_sent'): + yield msg + +def parse_email(email, fwd_index): + m = re.search("(IG|HY)(\d{2}) 5y (?:.*)SWAPTION (?:UPDATE|CLOSES|CLOSE) - Ref\D+(.+)$", + email.subject) + if m: + indextype, series, ref = m.groups() + series = int(series) + if indextype == 'HY': + refprice, refspread = map(float, + re.match("([\S]+)\s+\(([^)]+)\)", ref).groups()) + else: + refspread = float(ref) + + quotedate = datetime.datetime.fromtimestamp(email.datetime_sent.timestamp(), + timezone('America/New_York')) + flag = False + masterdf = {} + for line in email.body.split("\r\n"): + if line.startswith("Expiry"): + m = re.match("Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)", line) + if m: + date, fwprice, fwspread = m.groups() + date = pd.to_datetime(date, format='%d%b%y') + continue + if line.startswith("Stk"): + flag = True + r = [] + continue + if flag: + if line: + vals = re.sub(" +", " ", line).split(" ") + if indextype == 'HY': + vals.pop(2) + vals.pop(9) + else: + vals.pop(1) + vals.pop(8) + r.append(vals) + continue + else: + if indextype=='HY': + cols = ['Strike', 'Sprd', 'Pay', 'DeltaPay', 'Rec', 'Vol', + 'VolChg', 'VolBpd', 'Tail'] + else: + cols = ['Strike', 'Pay', 'DeltaPay', 'Rec', 'Vol', + 'VolChg', 'VolBpd', 'Tail'] + df = pd.DataFrame.from_records(r, columns = cols) + + df[['PayBid', 'PayOffer']] = df.Pay.str.split('/', expand=True) + df[['RecBid', 'RecOffer']] = df.Rec.str.split('/', expand=True) + df.drop(['Pay', 'Rec'], axis=1, inplace=True) + for col in df: + df[col] = pd.to_numeric(df[col], errors = 'coerce') + df.set_index('Strike', inplace=True) + d = {'quotedate': quotedate, + 'expiry': date, + 'index': indextype, + 'series': series, + 'ref': refspread if indextype =="IG" else refprice} + if indextype == "IG": + d['fwdspread'] = float(fwspread) + else: + d['fwdprice'] = float(fwprice) + fwd_index.append(d) + + masterdf[date] = df + flag = False + r = [] + continue + return (quotedate, indextype, series, pd.concat(masterdf, names=['expiry'])) + +def clean_df(all_df): + all_df = pd.concat(all_df, names=['quotedate', 'index', 'series']) + all_df['DeltaPay'] = - all_df['DeltaPay']/100 + all_df['Vol'] /= 100 + all_df.reset_index(inplace=True) + all_df = all_df.rename(columns={'Strike':'strike', + 'Vol': 'vol', + 'PayOffer': 'pay_offer', + 'PayBid': 'pay_bid', + 'RecOffer': 'rec_offer', + 'RecBid': 'rec_bid', + 'Tail': 'tail', + 'DeltaPay': 'delta_pay'}) + del all_df['VolBpd'], all_df['VolChg'] + if 'Sprd' in all_df: + del all_df['Sprd'] + all_df['quote_source'] = 'GS' + return all_df + +def insert_data(swaption_quotes, index_df): + conn = dbconn('serenitasdb') + format_str = "INSERT INTO swaption_ref_quotes({}) VALUES({}) " \ + "ON CONFLICT DO NOTHING" + sqlstr = format_str.format(",".join(index_df.columns), + ",".join(["%s"] * len(index_df.columns))) + with conn.cursor() as c: + c.executemany(sqlstr, index_df.itertuples(index=False)) + conn.commit() + + format_str = "INSERT INTO swaption_quotes({}) VALUES({}) " \ + "ON CONFLICT DO NOTHING" + sqlstr = format_str.format(",".join(swaption_quotes.columns), + ",".join(["%s"] * len(swaption_quotes.columns))) + with conn.cursor() as c: + c.executemany(sqlstr, swaption_quotes.itertuples(index=False)) + conn.commit() + conn.close() + +if __name__=="__main__": + fwd_index = [] + swaption_quotes = {} + for email in get_msgs(count=20): + quotedate, indextype, series, df = parse_email(email, fwd_index) + swaption_quotes[(quotedate, indextype, series)] = df + + swaption_quotes = clean_df(swaption_quotes) + index_df = pd.DataFrame(fwd_index) + index_df = index_df.drop_duplicates(['quotedate', 'index', 'series', 'expiry']) + insert_data(swaption_quotes, index_df) |
