aboutsummaryrefslogtreecommitdiffstats
path: root/python/parse_gs.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/parse_gs.py')
-rw-r--r--python/parse_gs.py152
1 files changed, 0 insertions, 152 deletions
diff --git a/python/parse_gs.py b/python/parse_gs.py
deleted file mode 100644
index ffbd038f..00000000
--- a/python/parse_gs.py
+++ /dev/null
@@ -1,152 +0,0 @@
-import pandas as pd
-import pdb
-import re
-import os
-
-data_dir = "/home/share/guillaume/swaptions"
-all_df = {}
-fwd_index = []
-for index in ["IG", "HY"]:
- full_path = os.path.join(data_dir, index + " swaptions")
- for f in os.listdir(os.path.join(data_dir, index + " swaptions")):
- with open(os.path.join(full_path, f), "rb") as fh:
- flag = False
- masterdf = {}
- for line in fh:
- line = line.decode("utf-8", "ignore")
- line = line.rstrip()
- m = re.search(
- "(IG|HY)(\d{2}) 5y (?:.*)SWAPTION (?:UPDATE|CLOSES|CLOSE) - Ref\D+(.+)$",
- line,
- )
- if m:
- indextype = m.groups()[0]
- series = int(m.groups()[1])
- if indextype == "HY":
- refprice, refspread = map(
- float,
- re.match("([\S]+)\s+\(([^)]+)\)", m.groups()[2]).groups(),
- )
- else:
- refspread = float(m.groups()[2])
- continue
- if line.startswith("At"):
- quotedate = pd.to_datetime(line[4:])
- continue
- if line.startswith("Expiry"):
- m = re.match(
- "Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)", line
- )
- if m:
- date, fwprice, fwspread = m.groups()
- date = pd.to_datetime(date, format="%d%b%y")
- continue
- if line.startswith("Stk"):
- flag = True
- r = []
- continue
- if flag:
- if line:
- vals = re.sub(" +", " ", line).split(" ")
- if indextype == "HY":
- vals.pop(2)
- vals.pop(9)
- else:
- vals.pop(1)
- vals.pop(8)
- r.append(vals)
- continue
- else:
- if indextype == "HY":
- cols = [
- "Strike",
- "Sprd",
- "Pay",
- "DeltaPay",
- "Rec",
- "Vol",
- "VolChg",
- "VolBpd",
- "Tail",
- ]
- else:
- cols = [
- "Strike",
- "Pay",
- "DeltaPay",
- "Rec",
- "Vol",
- "VolChg",
- "VolBpd",
- "Tail",
- ]
- df = pd.DataFrame.from_records(r, columns=cols)
-
- df[["PayBid", "PayOffer"]] = df.Pay.str.split("/", expand=True)
- df[["RecBid", "RecOffer"]] = df.Rec.str.split("/", expand=True)
- df.drop(["Pay", "Rec"], axis=1, inplace=True)
- for col in df:
- df[col] = pd.to_numeric(df[col], errors="coerce")
- df.set_index("Strike", inplace=True)
- d = {
- "quotedate": quotedate,
- "expiry": date,
- "index": indextype,
- "series": series,
- "ref": refspread if indextype == "IG" else refprice,
- }
- if indextype == "IG":
- d["fwdspread"] = float(fwspread)
- else:
- d["fwdprice"] = float(fwprice)
- fwd_index.append(d)
-
- masterdf[date] = df
- flag = False
- r = []
- continue
- all_df[(quotedate, indextype, series)] = pd.concat(masterdf, names=["expiry"])
-all_df = pd.concat(all_df, names=["quotedate", "index", "series"])
-all_df["DeltaPay"] = -all_df["DeltaPay"] / 100
-all_df["Vol"] /= 100
-index_df = pd.DataFrame(fwd_index)
-
-all_df.reset_index(inplace=True)
-all_df = all_df.rename(
- columns={
- "Strike": "strike",
- "Vol": "vol",
- "PayOffer": "pay_offer",
- "PayBid": "pay_bid",
- "RecOffer": "rec_offer",
- "RecBid": "rec_bid",
- "Tail": "tail",
- "DeltaPay": "delta_pay",
- }
-)
-del all_df["VolBpd"], all_df["VolChg"]
-if "Sprd" in all_df:
- del all_df["Sprd"]
-all_df["quote_source"] = "GS"
-index_df = index_df.drop_duplicates(["quotedate", "index", "series", "expiry"])
-
-##insert quotes
-from utils.db import dbengine
-
-serenitasdb = dbengine("serenitasdb")
-conn = serenitasdb.raw_connection()
-format_str = "INSERT INTO swaption_ref_quotes({}) VALUES({}) " "ON CONFLICT DO NOTHING"
-cols = index_df.columns
-sqlstr = format_str.format(",".join(cols), ",".join(["%s"] * len(cols)))
-with conn.cursor() as c:
- c.executemany(sqlstr, index_df.itertuples(index=False))
-conn.commit()
-
-
-format_str = "INSERT INTO swaption_quotes({}) VALUES({}) ON CONFLICT DO NOTHING"
-cols = all_df.columns
-sqlstr = format_str.format(",".join(cols), ",".join(["%s"] * len(cols)))
-with conn.cursor() as c:
- c.executemany(sqlstr, all_df.itertuples(index=False))
-conn.commit()
-conn.close()