import pandas as pd import pdb import re import os data_dir = "/home/share/guillaume/swaptions" all_df = {} fwd_index = [] for index in ["IG", "HY"]: full_path = os.path.join(data_dir, index + " swaptions") for f in os.listdir(os.path.join(data_dir, index + " swaptions")): with open(os.path.join(full_path, f), "rb") as fh: flag = False masterdf = {} for line in fh: line = line.decode("utf-8", "ignore") line = line.rstrip() m = re.search( "(IG|HY)(\d{2}) 5y (?:.*)SWAPTION (?:UPDATE|CLOSES|CLOSE) - Ref\D+(.+)$", line, ) if m: indextype = m.groups()[0] series = int(m.groups()[1]) if indextype == "HY": refprice, refspread = map( float, re.match("([\S]+)\s+\(([^)]+)\)", m.groups()[2]).groups(), ) else: refspread = float(m.groups()[2]) continue if line.startswith("At"): quotedate = pd.to_datetime(line[4:]) continue if line.startswith("Expiry"): m = re.match( "Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)", line ) if m: date, fwprice, fwspread = m.groups() date = pd.to_datetime(date, format="%d%b%y") continue if line.startswith("Stk"): flag = True r = [] continue if flag: if line: vals = re.sub(" +", " ", line).split(" ") if indextype == "HY": vals.pop(2) vals.pop(9) else: vals.pop(1) vals.pop(8) r.append(vals) continue else: if indextype == "HY": cols = [ "Strike", "Sprd", "Pay", "DeltaPay", "Rec", "Vol", "VolChg", "VolBpd", "Tail", ] else: cols = [ "Strike", "Pay", "DeltaPay", "Rec", "Vol", "VolChg", "VolBpd", "Tail", ] df = pd.DataFrame.from_records(r, columns=cols) df[["PayBid", "PayOffer"]] = df.Pay.str.split("/", expand=True) df[["RecBid", "RecOffer"]] = df.Rec.str.split("/", expand=True) df.drop(["Pay", "Rec"], axis=1, inplace=True) for col in df: df[col] = pd.to_numeric(df[col], errors="coerce") df.set_index("Strike", inplace=True) d = { "quotedate": quotedate, "expiry": date, "index": indextype, "series": series, "ref": refspread if indextype == "IG" else refprice, } if indextype == "IG": d["fwdspread"] = float(fwspread) else: d["fwdprice"] = float(fwprice) fwd_index.append(d) masterdf[date] = df flag = False r = [] continue all_df[(quotedate, indextype, series)] = pd.concat(masterdf, names=["expiry"]) all_df = pd.concat(all_df, names=["quotedate", "index", "series"]) all_df["DeltaPay"] = -all_df["DeltaPay"] / 100 all_df["Vol"] /= 100 index_df = pd.DataFrame(fwd_index) all_df.reset_index(inplace=True) all_df = all_df.rename( columns={ "Strike": "strike", "Vol": "vol", "PayOffer": "pay_offer", "PayBid": "pay_bid", "RecOffer": "rec_offer", "RecBid": "rec_bid", "Tail": "tail", "DeltaPay": "delta_pay", } ) del all_df["VolBpd"], all_df["VolChg"] if "Sprd" in all_df: del all_df["Sprd"] all_df["quote_source"] = "GS" index_df = index_df.drop_duplicates(["quotedate", "index", "series", "expiry"]) ##insert quotes from utils.db import dbengine serenitasdb = dbengine("serenitasdb") conn = serenitasdb.raw_connection() format_str = "INSERT INTO swaption_ref_quotes({}) VALUES({}) " "ON CONFLICT DO NOTHING" cols = index_df.columns sqlstr = format_str.format(",".join(cols), ",".join(["%s"] * len(cols))) with conn.cursor() as c: c.executemany(sqlstr, index_df.itertuples(index=False)) conn.commit() format_str = "INSERT INTO swaption_quotes({}) VALUES({}) ON CONFLICT DO NOTHING" cols = all_df.columns sqlstr = format_str.format(",".join(cols), ",".join(["%s"] * len(cols))) with conn.cursor() as c: c.executemany(sqlstr, all_df.itertuples(index=False)) conn.commit() conn.close()