diff options
Diffstat (limited to 'python')
| -rw-r--r-- | python/parse_gs.py | 152 | ||||
| -rw-r--r-- | python/parse_gs_exchange.py | 137 |
2 files changed, 0 insertions, 289 deletions
diff --git a/python/parse_gs.py b/python/parse_gs.py deleted file mode 100644 index ffbd038f..00000000 --- a/python/parse_gs.py +++ /dev/null @@ -1,152 +0,0 @@ -import pandas as pd -import pdb -import re -import os - -data_dir = "/home/share/guillaume/swaptions" -all_df = {} -fwd_index = [] -for index in ["IG", "HY"]: - full_path = os.path.join(data_dir, index + " swaptions") - for f in os.listdir(os.path.join(data_dir, index + " swaptions")): - with open(os.path.join(full_path, f), "rb") as fh: - flag = False - masterdf = {} - for line in fh: - line = line.decode("utf-8", "ignore") - line = line.rstrip() - m = re.search( - "(IG|HY)(\d{2}) 5y (?:.*)SWAPTION (?:UPDATE|CLOSES|CLOSE) - Ref\D+(.+)$", - line, - ) - if m: - indextype = m.groups()[0] - series = int(m.groups()[1]) - if indextype == "HY": - refprice, refspread = map( - float, - re.match("([\S]+)\s+\(([^)]+)\)", m.groups()[2]).groups(), - ) - else: - refspread = float(m.groups()[2]) - continue - if line.startswith("At"): - quotedate = pd.to_datetime(line[4:]) - continue - if line.startswith("Expiry"): - m = re.match( - "Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)", line - ) - if m: - date, fwprice, fwspread = m.groups() - date = pd.to_datetime(date, format="%d%b%y") - continue - if line.startswith("Stk"): - flag = True - r = [] - continue - if flag: - if line: - vals = re.sub(" +", " ", line).split(" ") - if indextype == "HY": - vals.pop(2) - vals.pop(9) - else: - vals.pop(1) - vals.pop(8) - r.append(vals) - continue - else: - if indextype == "HY": - cols = [ - "Strike", - "Sprd", - "Pay", - "DeltaPay", - "Rec", - "Vol", - "VolChg", - "VolBpd", - "Tail", - ] - else: - cols = [ - "Strike", - "Pay", - "DeltaPay", - "Rec", - "Vol", - "VolChg", - "VolBpd", - "Tail", - ] - df = pd.DataFrame.from_records(r, columns=cols) - - df[["PayBid", "PayOffer"]] = df.Pay.str.split("/", expand=True) - df[["RecBid", "RecOffer"]] = df.Rec.str.split("/", expand=True) - df.drop(["Pay", "Rec"], axis=1, inplace=True) - for col in df: - df[col] = pd.to_numeric(df[col], errors="coerce") - df.set_index("Strike", inplace=True) - d = { - "quotedate": quotedate, - "expiry": date, - "index": indextype, - "series": series, - "ref": refspread if indextype == "IG" else refprice, - } - if indextype == "IG": - d["fwdspread"] = float(fwspread) - else: - d["fwdprice"] = float(fwprice) - fwd_index.append(d) - - masterdf[date] = df - flag = False - r = [] - continue - all_df[(quotedate, indextype, series)] = pd.concat(masterdf, names=["expiry"]) -all_df = pd.concat(all_df, names=["quotedate", "index", "series"]) -all_df["DeltaPay"] = -all_df["DeltaPay"] / 100 -all_df["Vol"] /= 100 -index_df = pd.DataFrame(fwd_index) - -all_df.reset_index(inplace=True) -all_df = all_df.rename( - columns={ - "Strike": "strike", - "Vol": "vol", - "PayOffer": "pay_offer", - "PayBid": "pay_bid", - "RecOffer": "rec_offer", - "RecBid": "rec_bid", - "Tail": "tail", - "DeltaPay": "delta_pay", - } -) -del all_df["VolBpd"], all_df["VolChg"] -if "Sprd" in all_df: - del all_df["Sprd"] -all_df["quote_source"] = "GS" -index_df = index_df.drop_duplicates(["quotedate", "index", "series", "expiry"]) - -##insert quotes -from utils.db import dbengine - -serenitasdb = dbengine("serenitasdb") -conn = serenitasdb.raw_connection() -format_str = "INSERT INTO swaption_ref_quotes({}) VALUES({}) " "ON CONFLICT DO NOTHING" -cols = index_df.columns -sqlstr = format_str.format(",".join(cols), ",".join(["%s"] * len(cols))) -with conn.cursor() as c: - c.executemany(sqlstr, index_df.itertuples(index=False)) -conn.commit() - - -format_str = "INSERT INTO swaption_quotes({}) VALUES({}) ON CONFLICT DO NOTHING" -cols = all_df.columns -sqlstr = format_str.format(",".join(cols), ",".join(["%s"] * len(cols))) -with conn.cursor() as c: - c.executemany(sqlstr, all_df.itertuples(index=False)) -conn.commit() -conn.close() diff --git a/python/parse_gs_exchange.py b/python/parse_gs_exchange.py deleted file mode 100644 index a77f87c5..00000000 --- a/python/parse_gs_exchange.py +++ /dev/null @@ -1,137 +0,0 @@ -from pathlib import Path -from exchange import get_msgs -from pytz import timezone -from parse_emails import write_todb -import datetime -import logging -import os -import pandas as pd -import re - - -class ParseError(Exception): - pass - - -def parse_email(email, fwd_index): - m = re.search("(IG|HY)(\d{2}) 5y (?:.*)SWAPTION (?:UPDATE|CLOSES|CLOSE) - Ref\D+([\d.]+)(?:[^(]+\(([\d.]+)\))?", - email.subject) - if m: - indextype, series, ref, refspread = m.groups() - series = int(series) - if indextype == 'IG': - refspread = ref - try: - refspread = float(ref) - except ValueError as e: - raise ParseError(str(e)) - else: - raise ParseError(f"can't parse subject line: {email.subject}") - - quotedate = datetime.datetime.fromtimestamp(email.datetime_sent.timestamp(), - timezone('America/New_York')) - flag = False - masterdf = {} - for line in email.body.split("\r\n"): - if line.startswith("Expiry"): - m = re.match("Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)", line) - if m: - date, fwprice, fwspread = m.groups() - date = pd.to_datetime(date, format='%d%b%y') - continue - if line.startswith("Stk"): - flag = True - r = [] - continue - if flag: - if line: - vals = re.sub(" +", " ", line).split(" ") - if indextype == 'HY': - vals.pop(2) - vals.pop(9) - else: - vals.pop(1) - vals.pop(8) - r.append(vals) - continue - else: - if indextype == 'HY': - cols = ['Strike', 'Sprd', 'Pay', 'DeltaPay', 'Rec', 'Vol', - 'VolChg', 'VolBpd', 'Tail'] - else: - cols = ['Strike', 'Pay', 'DeltaPay', 'Rec', 'Vol', - 'VolChg', 'VolBpd', 'Tail'] - df = pd.DataFrame.from_records(r, columns=cols) - - df[['PayBid', 'PayOffer']] = df.Pay.str.split('/', expand=True) - df[['RecBid', 'RecOffer']] = df.Rec.str.split('/', expand=True) - df.drop(['Pay', 'Rec'], axis=1, inplace=True) - for col in df: - df[col] = pd.to_numeric(df[col], errors='coerce') - df.set_index('Strike', inplace=True) - d = {'quotedate': quotedate, - 'expiry': date, - 'index': indextype, - 'series': series, - 'ref': refspread if indextype == "IG" else ref, - 'msg_id': int(get_msg_id(email), 16)} - if indextype == "IG": - d['fwdspread'] = float(fwspread) - else: - d['fwdprice'] = float(fwprice) - fwd_index.append(d) - - masterdf[date] = df - flag = False - r = [] - continue - return quotedate, indextype, series, pd.concat(masterdf, names=['expiry']) - - -def clean_df(all_df): - all_df = pd.concat(all_df, names=['quotedate', 'index', 'series'], sort=True) - all_df['DeltaPay'] = - all_df['DeltaPay']/100 - all_df['Vol'] /= 100 - all_df.reset_index(inplace=True) - all_df = all_df.rename(columns={'Strike':'strike', - 'Vol': 'vol', - 'PayOffer': 'pay_offer', - 'PayBid': 'pay_bid', - 'RecOffer': 'rec_offer', - 'RecBid': 'rec_bid', - 'Tail': 'tail', - 'DeltaPay': 'delta_pay'}) - del all_df['VolBpd'], all_df['VolChg'] - if 'Sprd' in all_df: - del all_df['Sprd'] - return all_df - -def get_msg_id(msg): - return msg.message_id[1:17].lower() - -def save_email(msg, path): - fname = path / ("{:%Y-%m-%d %H-%M-%S}_{}". - format(msg.datetime_sent, - get_msg_id(msg))) - with fname.open("w") as fh: - fh.write(msg.text_body) - -if __name__ == "__main__": - fwd_index = [] - swaption_quotes = {} - save_path = Path(os.environ["DATA_DIR"]) / "swaptions" / "exchange" - for email in get_msgs(count=20): - save_email(email, save_path) - try: - quotedate, indextype, series, df = parse_email(email, fwd_index) - except ParseError as e: - logging.exception(e) - continue - - swaption_quotes[(quotedate, indextype, series)] = df - index_df = pd.DataFrame(fwd_index) - index_df = index_df.drop_duplicates(['quotedate', 'index', 'series', 'expiry']) - index_df['quote_source'] = 'GS' - swaption_quotes = clean_df(swaption_quotes) - swaption_quotes = swaption_quotes.set_index(['quotedate', 'index', 'series', 'expiry']) - write_todb(swaption_quotes, index_df) |
