aboutsummaryrefslogtreecommitdiffstats
path: root/python/exploration/dispersion.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/exploration/dispersion.py')
-rw-r--r--python/exploration/dispersion.py83
1 files changed, 83 insertions, 0 deletions
diff --git a/python/exploration/dispersion.py b/python/exploration/dispersion.py
new file mode 100644
index 00000000..01f29a89
--- /dev/null
+++ b/python/exploration/dispersion.py
@@ -0,0 +1,83 @@
+import datetime
+import numpy as np
+import pandas as pd
+import statsmodels.api as sm
+import statsmodels.formula.api as smf
+
+from analytics.basket_index import MarkitBasketIndex
+from analytics import CreditIndex
+from dateutil.relativedelta import relativedelta
+from utils.db import dbengine
+
+
+def get_dispersion(index_type, series, end_date=datetime.date.today()):
+ index = MarkitBasketIndex(index_type, series, ["5yr"])
+ dr = pd.bdate_range(index.issue_date, end_date)
+
+ dispersion = []
+ for d in dr:
+ index.value_date = d
+ dispersion.append(np.log(index.spreads()).std())
+
+ return pd.Series(dispersion, index=dr, name="dispersion")
+
+
+def add_cumloss(df, index_type, series, engine):
+ cumloss = pd.read_sql_query(
+ "SELECT lastdate, cumulativeloss AS cumloss FROM index_version "
+ "WHERE index=%s and series=%s order by lastdate",
+ engine,
+ params=(index_type, series),
+ parse_dates=["lastdate"],
+ )
+ cumloss.iat[-1, 0] = pd.Timestamp.max
+ cumloss = (
+ cumloss.set_index("lastdate").sort_index().reindex(df.index, method="bfill")
+ )
+ return df.join(cumloss)
+
+
+def get_corr_data(index_type, series, engine):
+ sql_str = (
+ "SELECT quotedate::date, indexrefspread, indexrefprice, index_duration, "
+ "index_expected_loss, corr_at_detach "
+ "FROM tranche_risk JOIN tranche_quotes "
+ "ON tranche_risk.tranche_id=tranche_quotes.id "
+ "WHERE index=%s and series=%s and tenor='5yr' and detach=%s order by quotedate desc"
+ )
+ df = pd.read_sql_query(
+ sql_str,
+ engine,
+ params=(index_type, series, 3 if index_type == "IG" else 15),
+ index_col=["quotedate"],
+ parse_dates=["quotedate"],
+ )
+ if index_type == "HY":
+ spread_equivalent = []
+ index = CreditIndex(index_type, series, "5yr")
+ for k, v in df.iterrows():
+ index.value_date = k
+ index.ref = v["indexrefprice"]
+ spread_equivalent.append(index.spread)
+ df["indexrefspread"] = spread_equivalent
+ df = df.assign(
+ fisher=lambda x: 0.5 * np.log((1 + x.corr_at_detach) / (1 - x.corr_at_detach))
+ )
+ return df
+
+
+if __name__ == "__main__":
+ index_type = "HY"
+ series = 29
+ serenitas_engine = dbengine("serenitasdb")
+ dispersion = get_dispersion(index_type, series)
+ df = get_corr_data(index_type, series, serenitas_engine)
+ df = df.join(dispersion)
+ if index_type == "HY":
+ df = add_cumloss(df, index_type, series, serenitas_engine)
+
+ if index_type == "HY":
+ formula = "fisher ~ np.log(dispersion) + cumloss + np.log(index_duration)"
+ else:
+ formula = "fisher ~ np.log(dispersion) + np.log(indexrefspread) + np.log(index_duration)"
+ mod = smf.ols(formula=formula, data=df)