diff options
Diffstat (limited to 'python/parse_gs_exchange.py')
| -rw-r--r-- | python/parse_gs_exchange.py | 137 |
1 files changed, 0 insertions, 137 deletions
diff --git a/python/parse_gs_exchange.py b/python/parse_gs_exchange.py deleted file mode 100644 index a77f87c5..00000000 --- a/python/parse_gs_exchange.py +++ /dev/null @@ -1,137 +0,0 @@ -from pathlib import Path -from exchange import get_msgs -from pytz import timezone -from parse_emails import write_todb -import datetime -import logging -import os -import pandas as pd -import re - - -class ParseError(Exception): - pass - - -def parse_email(email, fwd_index): - m = re.search("(IG|HY)(\d{2}) 5y (?:.*)SWAPTION (?:UPDATE|CLOSES|CLOSE) - Ref\D+([\d.]+)(?:[^(]+\(([\d.]+)\))?", - email.subject) - if m: - indextype, series, ref, refspread = m.groups() - series = int(series) - if indextype == 'IG': - refspread = ref - try: - refspread = float(ref) - except ValueError as e: - raise ParseError(str(e)) - else: - raise ParseError(f"can't parse subject line: {email.subject}") - - quotedate = datetime.datetime.fromtimestamp(email.datetime_sent.timestamp(), - timezone('America/New_York')) - flag = False - masterdf = {} - for line in email.body.split("\r\n"): - if line.startswith("Expiry"): - m = re.match("Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)", line) - if m: - date, fwprice, fwspread = m.groups() - date = pd.to_datetime(date, format='%d%b%y') - continue - if line.startswith("Stk"): - flag = True - r = [] - continue - if flag: - if line: - vals = re.sub(" +", " ", line).split(" ") - if indextype == 'HY': - vals.pop(2) - vals.pop(9) - else: - vals.pop(1) - vals.pop(8) - r.append(vals) - continue - else: - if indextype == 'HY': - cols = ['Strike', 'Sprd', 'Pay', 'DeltaPay', 'Rec', 'Vol', - 'VolChg', 'VolBpd', 'Tail'] - else: - cols = ['Strike', 'Pay', 'DeltaPay', 'Rec', 'Vol', - 'VolChg', 'VolBpd', 'Tail'] - df = pd.DataFrame.from_records(r, columns=cols) - - df[['PayBid', 'PayOffer']] = df.Pay.str.split('/', expand=True) - df[['RecBid', 'RecOffer']] = df.Rec.str.split('/', expand=True) - df.drop(['Pay', 'Rec'], axis=1, inplace=True) - for col in df: - df[col] = pd.to_numeric(df[col], errors='coerce') - df.set_index('Strike', inplace=True) - d = {'quotedate': quotedate, - 'expiry': date, - 'index': indextype, - 'series': series, - 'ref': refspread if indextype == "IG" else ref, - 'msg_id': int(get_msg_id(email), 16)} - if indextype == "IG": - d['fwdspread'] = float(fwspread) - else: - d['fwdprice'] = float(fwprice) - fwd_index.append(d) - - masterdf[date] = df - flag = False - r = [] - continue - return quotedate, indextype, series, pd.concat(masterdf, names=['expiry']) - - -def clean_df(all_df): - all_df = pd.concat(all_df, names=['quotedate', 'index', 'series'], sort=True) - all_df['DeltaPay'] = - all_df['DeltaPay']/100 - all_df['Vol'] /= 100 - all_df.reset_index(inplace=True) - all_df = all_df.rename(columns={'Strike':'strike', - 'Vol': 'vol', - 'PayOffer': 'pay_offer', - 'PayBid': 'pay_bid', - 'RecOffer': 'rec_offer', - 'RecBid': 'rec_bid', - 'Tail': 'tail', - 'DeltaPay': 'delta_pay'}) - del all_df['VolBpd'], all_df['VolChg'] - if 'Sprd' in all_df: - del all_df['Sprd'] - return all_df - -def get_msg_id(msg): - return msg.message_id[1:17].lower() - -def save_email(msg, path): - fname = path / ("{:%Y-%m-%d %H-%M-%S}_{}". - format(msg.datetime_sent, - get_msg_id(msg))) - with fname.open("w") as fh: - fh.write(msg.text_body) - -if __name__ == "__main__": - fwd_index = [] - swaption_quotes = {} - save_path = Path(os.environ["DATA_DIR"]) / "swaptions" / "exchange" - for email in get_msgs(count=20): - save_email(email, save_path) - try: - quotedate, indextype, series, df = parse_email(email, fwd_index) - except ParseError as e: - logging.exception(e) - continue - - swaption_quotes[(quotedate, indextype, series)] = df - index_df = pd.DataFrame(fwd_index) - index_df = index_df.drop_duplicates(['quotedate', 'index', 'series', 'expiry']) - index_df['quote_source'] = 'GS' - swaption_quotes = clean_df(swaption_quotes) - swaption_quotes = swaption_quotes.set_index(['quotedate', 'index', 'series', 'expiry']) - write_todb(swaption_quotes, index_df) |
