aboutsummaryrefslogtreecommitdiffstats
path: root/python/parse_gs_exchange.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/parse_gs_exchange.py')
-rw-r--r--python/parse_gs_exchange.py137
1 files changed, 0 insertions, 137 deletions
diff --git a/python/parse_gs_exchange.py b/python/parse_gs_exchange.py
deleted file mode 100644
index a77f87c5..00000000
--- a/python/parse_gs_exchange.py
+++ /dev/null
@@ -1,137 +0,0 @@
-from pathlib import Path
-from exchange import get_msgs
-from pytz import timezone
-from parse_emails import write_todb
-import datetime
-import logging
-import os
-import pandas as pd
-import re
-
-
-class ParseError(Exception):
- pass
-
-
-def parse_email(email, fwd_index):
- m = re.search("(IG|HY)(\d{2}) 5y (?:.*)SWAPTION (?:UPDATE|CLOSES|CLOSE) - Ref\D+([\d.]+)(?:[^(]+\(([\d.]+)\))?",
- email.subject)
- if m:
- indextype, series, ref, refspread = m.groups()
- series = int(series)
- if indextype == 'IG':
- refspread = ref
- try:
- refspread = float(ref)
- except ValueError as e:
- raise ParseError(str(e))
- else:
- raise ParseError(f"can't parse subject line: {email.subject}")
-
- quotedate = datetime.datetime.fromtimestamp(email.datetime_sent.timestamp(),
- timezone('America/New_York'))
- flag = False
- masterdf = {}
- for line in email.body.split("\r\n"):
- if line.startswith("Expiry"):
- m = re.match("Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)", line)
- if m:
- date, fwprice, fwspread = m.groups()
- date = pd.to_datetime(date, format='%d%b%y')
- continue
- if line.startswith("Stk"):
- flag = True
- r = []
- continue
- if flag:
- if line:
- vals = re.sub(" +", " ", line).split(" ")
- if indextype == 'HY':
- vals.pop(2)
- vals.pop(9)
- else:
- vals.pop(1)
- vals.pop(8)
- r.append(vals)
- continue
- else:
- if indextype == 'HY':
- cols = ['Strike', 'Sprd', 'Pay', 'DeltaPay', 'Rec', 'Vol',
- 'VolChg', 'VolBpd', 'Tail']
- else:
- cols = ['Strike', 'Pay', 'DeltaPay', 'Rec', 'Vol',
- 'VolChg', 'VolBpd', 'Tail']
- df = pd.DataFrame.from_records(r, columns=cols)
-
- df[['PayBid', 'PayOffer']] = df.Pay.str.split('/', expand=True)
- df[['RecBid', 'RecOffer']] = df.Rec.str.split('/', expand=True)
- df.drop(['Pay', 'Rec'], axis=1, inplace=True)
- for col in df:
- df[col] = pd.to_numeric(df[col], errors='coerce')
- df.set_index('Strike', inplace=True)
- d = {'quotedate': quotedate,
- 'expiry': date,
- 'index': indextype,
- 'series': series,
- 'ref': refspread if indextype == "IG" else ref,
- 'msg_id': int(get_msg_id(email), 16)}
- if indextype == "IG":
- d['fwdspread'] = float(fwspread)
- else:
- d['fwdprice'] = float(fwprice)
- fwd_index.append(d)
-
- masterdf[date] = df
- flag = False
- r = []
- continue
- return quotedate, indextype, series, pd.concat(masterdf, names=['expiry'])
-
-
-def clean_df(all_df):
- all_df = pd.concat(all_df, names=['quotedate', 'index', 'series'], sort=True)
- all_df['DeltaPay'] = - all_df['DeltaPay']/100
- all_df['Vol'] /= 100
- all_df.reset_index(inplace=True)
- all_df = all_df.rename(columns={'Strike':'strike',
- 'Vol': 'vol',
- 'PayOffer': 'pay_offer',
- 'PayBid': 'pay_bid',
- 'RecOffer': 'rec_offer',
- 'RecBid': 'rec_bid',
- 'Tail': 'tail',
- 'DeltaPay': 'delta_pay'})
- del all_df['VolBpd'], all_df['VolChg']
- if 'Sprd' in all_df:
- del all_df['Sprd']
- return all_df
-
-def get_msg_id(msg):
- return msg.message_id[1:17].lower()
-
-def save_email(msg, path):
- fname = path / ("{:%Y-%m-%d %H-%M-%S}_{}".
- format(msg.datetime_sent,
- get_msg_id(msg)))
- with fname.open("w") as fh:
- fh.write(msg.text_body)
-
-if __name__ == "__main__":
- fwd_index = []
- swaption_quotes = {}
- save_path = Path(os.environ["DATA_DIR"]) / "swaptions" / "exchange"
- for email in get_msgs(count=20):
- save_email(email, save_path)
- try:
- quotedate, indextype, series, df = parse_email(email, fwd_index)
- except ParseError as e:
- logging.exception(e)
- continue
-
- swaption_quotes[(quotedate, indextype, series)] = df
- index_df = pd.DataFrame(fwd_index)
- index_df = index_df.drop_duplicates(['quotedate', 'index', 'series', 'expiry'])
- index_df['quote_source'] = 'GS'
- swaption_quotes = clean_df(swaption_quotes)
- swaption_quotes = swaption_quotes.set_index(['quotedate', 'index', 'series', 'expiry'])
- write_todb(swaption_quotes, index_df)