aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/parse_gs_exchange.py144
1 files changed, 144 insertions, 0 deletions
diff --git a/python/parse_gs_exchange.py b/python/parse_gs_exchange.py
new file mode 100644
index 00000000..9194a2a5
--- /dev/null
+++ b/python/parse_gs_exchange.py
@@ -0,0 +1,144 @@
+from db import dbconn
+from exchangelib import Credentials, Mailbox, Configuration, Account, DELEGATE
+from pytz import timezone
+
+import datetime
+import json
+import os
+import pandas as pd
+import re
+
+def get_msgs(email_address='ghorel@lmcg.com', count=None):
+ with open(os.path.join('.credentials', email_address + '.json')) as fh:
+ creds = json.load(fh)
+ credentials = Credentials(**creds)
+ config = Configuration(server='autodiscover.lmcg.com', credentials=credentials)
+ account = Account(primary_smtp_address=email_address, config=config,
+ autodiscover=False, access_type=DELEGATE)
+ folder = account.root.get_folder_by_name('GS').get_folder_by_name('Swaptions')
+ if count:
+ for msg in folder.all().order_by('-datetime_sent')[:count]:
+ yield msg
+ else:
+ for msg in folder.all().order_by('-datetime_sent'):
+ yield msg
+
+def parse_email(email, fwd_index):
+ m = re.search("(IG|HY)(\d{2}) 5y (?:.*)SWAPTION (?:UPDATE|CLOSES|CLOSE) - Ref\D+(.+)$",
+ email.subject)
+ if m:
+ indextype, series, ref = m.groups()
+ series = int(series)
+ if indextype == 'HY':
+ refprice, refspread = map(float,
+ re.match("([\S]+)\s+\(([^)]+)\)", ref).groups())
+ else:
+ refspread = float(ref)
+
+ quotedate = datetime.datetime.fromtimestamp(email.datetime_sent.timestamp(),
+ timezone('America/New_York'))
+ flag = False
+ masterdf = {}
+ for line in email.body.split("\r\n"):
+ if line.startswith("Expiry"):
+ m = re.match("Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)", line)
+ if m:
+ date, fwprice, fwspread = m.groups()
+ date = pd.to_datetime(date, format='%d%b%y')
+ continue
+ if line.startswith("Stk"):
+ flag = True
+ r = []
+ continue
+ if flag:
+ if line:
+ vals = re.sub(" +", " ", line).split(" ")
+ if indextype == 'HY':
+ vals.pop(2)
+ vals.pop(9)
+ else:
+ vals.pop(1)
+ vals.pop(8)
+ r.append(vals)
+ continue
+ else:
+ if indextype=='HY':
+ cols = ['Strike', 'Sprd', 'Pay', 'DeltaPay', 'Rec', 'Vol',
+ 'VolChg', 'VolBpd', 'Tail']
+ else:
+ cols = ['Strike', 'Pay', 'DeltaPay', 'Rec', 'Vol',
+ 'VolChg', 'VolBpd', 'Tail']
+ df = pd.DataFrame.from_records(r, columns = cols)
+
+ df[['PayBid', 'PayOffer']] = df.Pay.str.split('/', expand=True)
+ df[['RecBid', 'RecOffer']] = df.Rec.str.split('/', expand=True)
+ df.drop(['Pay', 'Rec'], axis=1, inplace=True)
+ for col in df:
+ df[col] = pd.to_numeric(df[col], errors = 'coerce')
+ df.set_index('Strike', inplace=True)
+ d = {'quotedate': quotedate,
+ 'expiry': date,
+ 'index': indextype,
+ 'series': series,
+ 'ref': refspread if indextype =="IG" else refprice}
+ if indextype == "IG":
+ d['fwdspread'] = float(fwspread)
+ else:
+ d['fwdprice'] = float(fwprice)
+ fwd_index.append(d)
+
+ masterdf[date] = df
+ flag = False
+ r = []
+ continue
+ return (quotedate, indextype, series, pd.concat(masterdf, names=['expiry']))
+
+def clean_df(all_df):
+ all_df = pd.concat(all_df, names=['quotedate', 'index', 'series'])
+ all_df['DeltaPay'] = - all_df['DeltaPay']/100
+ all_df['Vol'] /= 100
+ all_df.reset_index(inplace=True)
+ all_df = all_df.rename(columns={'Strike':'strike',
+ 'Vol': 'vol',
+ 'PayOffer': 'pay_offer',
+ 'PayBid': 'pay_bid',
+ 'RecOffer': 'rec_offer',
+ 'RecBid': 'rec_bid',
+ 'Tail': 'tail',
+ 'DeltaPay': 'delta_pay'})
+ del all_df['VolBpd'], all_df['VolChg']
+ if 'Sprd' in all_df:
+ del all_df['Sprd']
+ all_df['quote_source'] = 'GS'
+ return all_df
+
+def insert_data(swaption_quotes, index_df):
+ conn = dbconn('serenitasdb')
+ format_str = "INSERT INTO swaption_ref_quotes({}) VALUES({}) " \
+ "ON CONFLICT DO NOTHING"
+ sqlstr = format_str.format(",".join(index_df.columns),
+ ",".join(["%s"] * len(index_df.columns)))
+ with conn.cursor() as c:
+ c.executemany(sqlstr, index_df.itertuples(index=False))
+ conn.commit()
+
+ format_str = "INSERT INTO swaption_quotes({}) VALUES({}) " \
+ "ON CONFLICT DO NOTHING"
+ sqlstr = format_str.format(",".join(swaption_quotes.columns),
+ ",".join(["%s"] * len(swaption_quotes.columns)))
+ with conn.cursor() as c:
+ c.executemany(sqlstr, swaption_quotes.itertuples(index=False))
+ conn.commit()
+ conn.close()
+
+if __name__=="__main__":
+ fwd_index = []
+ swaption_quotes = {}
+ for email in get_msgs(count=20):
+ quotedate, indextype, series, df = parse_email(email, fwd_index)
+ swaption_quotes[(quotedate, indextype, series)] = df
+
+ swaption_quotes = clean_df(swaption_quotes)
+ index_df = pd.DataFrame(fwd_index)
+ index_df = index_df.drop_duplicates(['quotedate', 'index', 'series', 'expiry'])
+ insert_data(swaption_quotes, index_df)