2 files changed, 62 insertions, 29 deletions
diff --git a/python/exploration/dispersion.py b/python/exploration/dispersion.py
index b33f2796..c9f219cf 100644
--- a/python/exploration/dispersion.py
+++ b/python/exploration/dispersion.py
@@ -67,6 +67,13 @@ def get_tranche_data(conn, index_type, tenor="5yr"):
             (x.detach - x.cumulativeloss) / df.indexfactor, 1
         ),
         moneyness=lambda x: (x.detach_adj + x.attach_adj) / 2 / x.index_expected_loss,
+        att_moneyness=lambda x: x.attach_adj / x.index_expected_loss,
+        det_moneyness=lambda x: x.detach_adj / x.index_expected_loss,
+    )
+    df = df.assign(
+        thickness=(df.detach_adj - df.attach_adj),
+        tranche_loss_per=(df.exp_percentage * df.index_expected_loss)
+        / (df.detach_adj - df.attach_adj),
     )
     df = df.set_index(["date", "index", "series", "version", "tenor", "attach"])
     series = tuple(df.index.get_level_values("series").unique())
@@ -112,7 +119,42 @@ def create_models(conn, df) -> (pd.DataFrame, float):
     return (df, model)
 
 
-def create_models_separate(df):
+def create_models_v2(conn, df) -> (pd.DataFrame, float):
+    # Takes the output of get_tranche_data
+    attach_max = df.index.get_level_values("attach").max()
+    bottom_stack = df[df.index.get_level_values("attach") != attach_max]
+    model = smf.ols(
+        "logit(tranche_loss_per) ~ "
+        "np.log(index_duration) + "
+        "np.log(moneyness) * gini + "
+        "np.log(index_expected_loss)* gini + "
+        "expit(att_moneyness) +"
+        "expit(det_moneyness)",
+        data=bottom_stack,
+    )
+    f = model.fit()
+    df.loc[
+        df.index.get_level_values("attach") != attach_max, "predict_tranche_loss"
+    ] = expit(f.predict(bottom_stack))
+    df.loc[df.index.get_level_values("attach") != attach_max, "predict"] = (
+        df.predict_tranche_loss * df.thickness / df.index_expected_loss
+    )
+
+    def aux(s):
+        temp = s.values
+        temp[-1] = 1 - temp[:-1].sum()
+        return temp
+
+    df["predict"] = df.groupby(["index", "series", "date"])["predict"].transform(aux)
+    df = df.assign(
+        mispricing=(df.exp_percentage - df.predict)
+        * df.index_expected_loss
+        / (df.detach_adj - df.attach_adj)
+    )
+    return (df, model)
+
+
+def create_separate_models(df):
     # Takes the output of get_tranche_data
     model, calc = {}, {}
     df = df.assign(
@@ -124,9 +166,10 @@ def create_models_separate(df):
         calc[attach] = df.loc(axis=0)[:, :, :, "5yr", attach]
         model[attach] = smf.ols(
             "logit(tranche_loss_per) ~ "
-            "np.log(moneyness)* logit(gini) + "
-            "np.log(index_expected_loss)* logit(gini) + "
+            "I(np.log(index_expected_loss)**2) + "
             "np.log(index_duration) + "
+            "np.log(moneyness) * logit(gini) + "
+            "np.log(index_expected_loss)* logit(gini) + "
             "I(np.log(moneyness)**2) + I(np.log(moneyness)**3)",
             data=calc[attach],
         ).fit()
@@ -147,9 +190,8 @@ def create_models_separate(df):
         (calc["exp_percentage"] - calc["predict_N"])
         * calc["index_expected_loss"]
         / (calc["detach_adj"] - calc["attach_adj"])
-        * 100
     )
-    return model, calc
+    return (calc, model)
 
 
 if __name__ == "__main__":
diff --git a/python/notebooks/Dispersion.ipynb b/python/notebooks/Dispersion.ipynb
index 0c3b518f..1b722f50 100644
--- a/python/notebooks/Dispersion.ipynb
+++ b/python/notebooks/Dispersion.ipynb
@@ -42,9 +42,11 @@
     "#end = (start + pd.offsets.BDay(1) * 365).date()\n",
     "end = datetime.datetime.today()\n",
     "index_type = 'IG'\n",
-    "risk = disp.get_tranche_data(dbconn(\"serenitasdb\"), index_type)\n",
+    "serenitasconn = dbconn(\"serenitasdb\")\n",
+    "serenitasconn.autocommit = True\n",
+    "risk = disp.get_tranche_data(serenitasconn, index_type)\n",
     "train_data = risk[start: end]\n",
-    "gini_calc, gini_model = disp.create_models(dbconn(\"serenitasdb\"), train_data)\n",
+    "gini_calc, gini_model = disp.create_models(serenitasconn, train_data)\n",
     "gini_model.fit().summary()"
    ]
   },
@@ -321,15 +323,17 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#Old Model\n",
     "value_date = (datetime.datetime.today() - pd.offsets.BDay(1)).date()\n",
     "start = (datetime.datetime.today() - pd.offsets.BDay(1) * 365 *4).date()\n",
     "#end = (start + pd.offsets.BDay(1) * 365).date()\n",
     "end = datetime.datetime.today()\n",
     "gini_model, gini_results = {}, {}\n",
+    "conn = dbconn(\"serenitasdb\")\n",
+    "conn.autocommit = True\n",
     "for index_type in ['HY', 'IG', 'EU', 'XO']:\n",
     "    risk = disp.get_tranche_data(dbconn(\"serenitasdb\"), index_type)\n",
-    "    gini_model[index_type], gini_results[index_type] = disp.create_models_separate(risk)"
+    "    #gini_results[index_type], gini_model[index_type] = disp.create_separate_models(risk)\n",
+    "    gini_results[index_type], gini_model[index_type] = disp.create_models_v2(conn, risk)"
    ]
   },
   {
@@ -338,7 +342,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "gini_model['HY'][0].summary()"
+    "#gini_model['HY'][0].summary()\n",
+    "gini_model['HY'].fit().summary()"
    ]
   },
   {
@@ -356,17 +361,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "gini_results.to_csv('/home/serenitas/edwin/results.csv', header=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
+    "fieldlist = ['exp_percentage','dispersion','gini','tranche_loss_per','mispricing']\n",
     "for index_type in ['HY', 'IG', 'EU', 'XO']:\n",
-    "    gini_results[index_type].to_csv('/home/serenitas/edwin/' + index_type + '_results.csv')"
+    "    gini_results[index_type][fieldlist].to_csv('/home/serenitas/edwin/' + index_type + '_results.csv')"
    ]
   },
   {
@@ -374,27 +371,21 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": [
-    "to_plot = gini_results.xs(0, level='attach')['mispricing']"
-   ]
+   "source": []
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": [
-    "to_plot.groupby(['date', 'index','tenor']).nth(-1).plot()"
-   ]
+   "source": []
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": [
-    "gini_results.xs(31, level='series')"
-   ]
+   "source": []
   },
   {
    "cell_type": "code",