1 files changed, 83 insertions, 0 deletions
diff --git a/python/exploration/dispersion.py b/python/exploration/dispersion.py
new file mode 100644
index 00000000..01f29a89
--- /dev/null
+++ b/python/exploration/dispersion.py
@@ -0,0 +1,83 @@
+import datetime
+import numpy as np
+import pandas as pd
+import statsmodels.api as sm
+import statsmodels.formula.api as smf
+
+from analytics.basket_index import MarkitBasketIndex
+from analytics import CreditIndex
+from dateutil.relativedelta import relativedelta
+from utils.db import dbengine
+
+
+def get_dispersion(index_type, series, end_date=datetime.date.today()):
+    index = MarkitBasketIndex(index_type, series, ["5yr"])
+    dr = pd.bdate_range(index.issue_date, end_date)
+
+    dispersion = []
+    for d in dr:
+        index.value_date = d
+        dispersion.append(np.log(index.spreads()).std())
+
+    return pd.Series(dispersion, index=dr, name="dispersion")
+
+
+def add_cumloss(df, index_type, series, engine):
+    cumloss = pd.read_sql_query(
+        "SELECT lastdate, cumulativeloss AS cumloss FROM index_version "
+        "WHERE index=%s and series=%s order by lastdate",
+        engine,
+        params=(index_type, series),
+        parse_dates=["lastdate"],
+    )
+    cumloss.iat[-1, 0] = pd.Timestamp.max
+    cumloss = (
+        cumloss.set_index("lastdate").sort_index().reindex(df.index, method="bfill")
+    )
+    return df.join(cumloss)
+
+
+def get_corr_data(index_type, series, engine):
+    sql_str = (
+        "SELECT quotedate::date, indexrefspread, indexrefprice, index_duration, "
+        "index_expected_loss, corr_at_detach "
+        "FROM tranche_risk JOIN tranche_quotes "
+        "ON tranche_risk.tranche_id=tranche_quotes.id "
+        "WHERE index=%s and series=%s and tenor='5yr' and detach=%s order by quotedate desc"
+    )
+    df = pd.read_sql_query(
+        sql_str,
+        engine,
+        params=(index_type, series, 3 if index_type == "IG" else 15),
+        index_col=["quotedate"],
+        parse_dates=["quotedate"],
+    )
+    if index_type == "HY":
+        spread_equivalent = []
+        index = CreditIndex(index_type, series, "5yr")
+        for k, v in df.iterrows():
+            index.value_date = k
+            index.ref = v["indexrefprice"]
+            spread_equivalent.append(index.spread)
+        df["indexrefspread"] = spread_equivalent
+    df = df.assign(
+        fisher=lambda x: 0.5 * np.log((1 + x.corr_at_detach) / (1 - x.corr_at_detach))
+    )
+    return df
+
+
+if __name__ == "__main__":
+    index_type = "HY"
+    series = 29
+    serenitas_engine = dbengine("serenitasdb")
+    dispersion = get_dispersion(index_type, series)
+    df = get_corr_data(index_type, series, serenitas_engine)
+    df = df.join(dispersion)
+    if index_type == "HY":
+        df = add_cumloss(df, index_type, series, serenitas_engine)
+
+    if index_type == "HY":
+        formula = "fisher ~ np.log(dispersion) + cumloss + np.log(index_duration)"
+    else:
+        formula = "fisher ~ np.log(dispersion) + np.log(indexrefspread) + np.log(index_duration)"
+    mod = smf.ols(formula=formula, data=df)