diff options
Diffstat (limited to 'python/parse_gs.py')
| -rw-r--r-- | python/parse_gs.py | 152 |
1 files changed, 0 insertions, 152 deletions
diff --git a/python/parse_gs.py b/python/parse_gs.py deleted file mode 100644 index ffbd038f..00000000 --- a/python/parse_gs.py +++ /dev/null @@ -1,152 +0,0 @@ -import pandas as pd -import pdb -import re -import os - -data_dir = "/home/share/guillaume/swaptions" -all_df = {} -fwd_index = [] -for index in ["IG", "HY"]: - full_path = os.path.join(data_dir, index + " swaptions") - for f in os.listdir(os.path.join(data_dir, index + " swaptions")): - with open(os.path.join(full_path, f), "rb") as fh: - flag = False - masterdf = {} - for line in fh: - line = line.decode("utf-8", "ignore") - line = line.rstrip() - m = re.search( - "(IG|HY)(\d{2}) 5y (?:.*)SWAPTION (?:UPDATE|CLOSES|CLOSE) - Ref\D+(.+)$", - line, - ) - if m: - indextype = m.groups()[0] - series = int(m.groups()[1]) - if indextype == "HY": - refprice, refspread = map( - float, - re.match("([\S]+)\s+\(([^)]+)\)", m.groups()[2]).groups(), - ) - else: - refspread = float(m.groups()[2]) - continue - if line.startswith("At"): - quotedate = pd.to_datetime(line[4:]) - continue - if line.startswith("Expiry"): - m = re.match( - "Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)", line - ) - if m: - date, fwprice, fwspread = m.groups() - date = pd.to_datetime(date, format="%d%b%y") - continue - if line.startswith("Stk"): - flag = True - r = [] - continue - if flag: - if line: - vals = re.sub(" +", " ", line).split(" ") - if indextype == "HY": - vals.pop(2) - vals.pop(9) - else: - vals.pop(1) - vals.pop(8) - r.append(vals) - continue - else: - if indextype == "HY": - cols = [ - "Strike", - "Sprd", - "Pay", - "DeltaPay", - "Rec", - "Vol", - "VolChg", - "VolBpd", - "Tail", - ] - else: - cols = [ - "Strike", - "Pay", - "DeltaPay", - "Rec", - "Vol", - "VolChg", - "VolBpd", - "Tail", - ] - df = pd.DataFrame.from_records(r, columns=cols) - - df[["PayBid", "PayOffer"]] = df.Pay.str.split("/", expand=True) - df[["RecBid", "RecOffer"]] = df.Rec.str.split("/", expand=True) - df.drop(["Pay", "Rec"], axis=1, inplace=True) - for col in df: - df[col] = pd.to_numeric(df[col], errors="coerce") - df.set_index("Strike", inplace=True) - d = { - "quotedate": quotedate, - "expiry": date, - "index": indextype, - "series": series, - "ref": refspread if indextype == "IG" else refprice, - } - if indextype == "IG": - d["fwdspread"] = float(fwspread) - else: - d["fwdprice"] = float(fwprice) - fwd_index.append(d) - - masterdf[date] = df - flag = False - r = [] - continue - all_df[(quotedate, indextype, series)] = pd.concat(masterdf, names=["expiry"]) -all_df = pd.concat(all_df, names=["quotedate", "index", "series"]) -all_df["DeltaPay"] = -all_df["DeltaPay"] / 100 -all_df["Vol"] /= 100 -index_df = pd.DataFrame(fwd_index) - -all_df.reset_index(inplace=True) -all_df = all_df.rename( - columns={ - "Strike": "strike", - "Vol": "vol", - "PayOffer": "pay_offer", - "PayBid": "pay_bid", - "RecOffer": "rec_offer", - "RecBid": "rec_bid", - "Tail": "tail", - "DeltaPay": "delta_pay", - } -) -del all_df["VolBpd"], all_df["VolChg"] -if "Sprd" in all_df: - del all_df["Sprd"] -all_df["quote_source"] = "GS" -index_df = index_df.drop_duplicates(["quotedate", "index", "series", "expiry"]) - -##insert quotes -from utils.db import dbengine - -serenitasdb = dbengine("serenitasdb") -conn = serenitasdb.raw_connection() -format_str = "INSERT INTO swaption_ref_quotes({}) VALUES({}) " "ON CONFLICT DO NOTHING" -cols = index_df.columns -sqlstr = format_str.format(",".join(cols), ",".join(["%s"] * len(cols))) -with conn.cursor() as c: - c.executemany(sqlstr, index_df.itertuples(index=False)) -conn.commit() - - -format_str = "INSERT INTO swaption_quotes({}) VALUES({}) ON CONFLICT DO NOTHING" -cols = all_df.columns -sqlstr = format_str.format(",".join(cols), ",".join(["%s"] * len(cols))) -with conn.cursor() as c: - c.executemany(sqlstr, all_df.itertuples(index=False)) -conn.commit() -conn.close() |
