2 files changed, 208 insertions, 18 deletions
diff --git a/python/exploration/dispersion.py b/python/exploration/dispersion.py
index fac383c5..b33f2796 100644
--- a/python/exploration/dispersion.py
+++ b/python/exploration/dispersion.py
@@ -104,9 +104,54 @@ def create_models(conn, df) -> (pd.DataFrame, float):
         return temp
 
     df["predict"] = df.groupby(["index", "series", "date"])["predict"].transform(aux)
+    df = df.assign(
+        mispricing=(df.exp_percentage - df.predict)
+        * df.index_expected_loss
+        / (df.detach_adj - df.attach_adj)
+    )
     return (df, model)
 
 
+def create_models_separate(df):
+    # Takes the output of get_tranche_data
+    model, calc = {}, {}
+    df = df.assign(
+        tranche_loss_per=(df.exp_percentage * df.index_expected_loss)
+        / (df.detach_adj - df.attach_adj)
+    )
+    df = df.groupby(["date", "index", "series", "tenor", "attach"]).nth(-1)
+    for attach in df.index.get_level_values("attach").unique():
+        calc[attach] = df.loc(axis=0)[:, :, :, "5yr", attach]
+        model[attach] = smf.ols(
+            "logit(tranche_loss_per) ~ "
+            "np.log(moneyness)* logit(gini) + "
+            "np.log(index_expected_loss)* logit(gini) + "
+            "np.log(index_duration) + "
+            "I(np.log(moneyness)**2) + I(np.log(moneyness)**3)",
+            data=calc[attach],
+        ).fit()
+
+        calc[attach] = calc[attach].assign(
+            predict=expit(model[attach].predict(calc[attach]))
+            * (df.detach_adj - df.attach_adj)
+            / df.index_expected_loss
+        )
+
+    calc = pd.concat(calc, sort=False).reset_index(level=0, drop=True)
+    normalization = calc.groupby(["date", "index", "series", "tenor"])["predict"].sum()
+    calc = calc.merge(
+        normalization, left_index=True, right_index=True, suffixes=["_preN", "_sum"]
+    )
+    calc["predict_N"] = calc["predict_preN"] / calc["predict_sum"]
+    calc["mispricing"] = (
+        (calc["exp_percentage"] - calc["predict_N"])
+        * calc["index_expected_loss"]
+        / (calc["detach_adj"] - calc["attach_adj"])
+        * 100
+    )
+    return model, calc
+
+
 if __name__ == "__main__":
     index_type = "HY"
     series = 29
diff --git a/python/notebooks/Dispersion.ipynb b/python/notebooks/Dispersion.ipynb
index e8e07f4a..0c3b518f 100644
--- a/python/notebooks/Dispersion.ipynb
+++ b/python/notebooks/Dispersion.ipynb
@@ -19,9 +19,7 @@
     "from analytics import on_the_run\n",
     "from statsmodels.graphics.regressionplots import plot_fit\n",
     "from pygam import LinearGAM, s, f, GAM\n",
-    "from utils.db import dbengine\n",
-    "\n",
-    "serenitas_engine = dbengine('serenitasdb')"
+    "from utils.db import dbengine, dbconn"
    ]
   },
   {
@@ -40,9 +38,14 @@
    "outputs": [],
    "source": [
     "value_date = (datetime.datetime.today() - pd.offsets.BDay(1)).date()\n",
-    "start_date = datetime.date(2019,9,27)\n",
-    "end_date = datetime.date(2020,1,30)\n",
-    "index_type = 'HY'"
+    "start = (datetime.datetime.today() - pd.offsets.BDay(1) * 365 *4).date()\n",
+    "#end = (start + pd.offsets.BDay(1) * 365).date()\n",
+    "end = datetime.datetime.today()\n",
+    "index_type = 'IG'\n",
+    "risk = disp.get_tranche_data(dbconn(\"serenitasdb\"), index_type)\n",
+    "train_data = risk[start: end]\n",
+    "gini_calc, gini_model = disp.create_models(dbconn(\"serenitasdb\"), train_data)\n",
+    "gini_model.fit().summary()"
    ]
   },
   {
@@ -51,11 +54,18 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#Get Gini factor\n",
-    "date_range = pd.bdate_range(end=value_date, freq='5B',periods=52*4)\n",
-    "risk = disp.get_tranche_data(index_type, serenitas_engine)\n",
-    "risk = risk[risk.index.get_level_values(0).isin(date_range)]\n",
-    "gini_model, gini_calc = disp.create_models(risk, use_gini=True, use_log=True)"
+    "gini_calc.xs(31, level = 'series')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#use trained model to fit rest of the data\n",
+    "f = gini_model.fit()\n",
+    "risk.loc[risk.index.get_level_values(\"attach\") != attach_max, \"predict\"] = expit(f.predict(bottom_stack))"
    ]
   },
   {
@@ -66,7 +76,7 @@
    "source": [
     "#Plot Gini if (use gini=True, use_log=False)\n",
     "to_plot_gini = gini_calc.xs(0, level='attach').groupby(['date', 'series']).nth(-1)\n",
-    "to_plot_gini['dispersion'].unstack().plot()"
+    "to_plot_gini['gini'].unstack().plot()"
    ]
   },
   {
@@ -75,9 +85,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#Today's mispricing\n",
+    "#last day: mispricing\n",
     "today = gini_calc.xs([value_date,33], level=['date','series'])\n",
-    "today[['exp_percentage', 'predict_N', 'predict_preN', 'mispricing']]"
+    "today[['exp_percentage', 'predict', 'mispricing']]"
    ]
   },
   {
@@ -88,7 +98,8 @@
    "source": [
     "#plot mispricing of a tranche through time \n",
     "attach = 0\n",
-    "to_plot = gini_calc.xs(attach, level='attach')['mispricing']\n",
+    "series = 33\n",
+    "to_plot = gini_calc.xs([attach, series], level=['attach', 'series'])['mispricing']\n",
     "to_plot.reset_index(['index','tenor'], drop=True).unstack().plot()"
    ]
   },
@@ -98,6 +109,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "gini_calc.xs([attach, series], level=['attach', 'series']).to_clipboard()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "#plot mispricing of series through time \n",
     "series = 33\n",
     "to_plot = gini_calc.xs(series, level='series')['mispricing']\n",
@@ -119,6 +139,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "to_csv('/home/serenitas/edwin/Python/temp1.csv')\n",
+    "\n",
     "gini_calc.to_csv('/home/serenitas/edwin/Python/' + index_type+ '_tranche_model.csv')"
    ]
   },
@@ -132,12 +154,36 @@
     "tranche_returns = tdata.get_tranche_quotes(index=index_type)\n",
     "tranche_returns = tdata.tranche_returns(df=tranche_returns)\n",
     "attach = 0\n",
-    "t = tranche_returns['deladj_return'].reset_index(['index', 'tenor'], drop=True).xs(attach, level='attach')\n",
+    "t = tranche_returns['delhedged_return'].reset_index(['index', 'tenor'], drop=True).xs(attach, level='attach')\n",
     "temp={}\n",
     "for i,g in t.groupby('series'):\n",
     "    temp[i] = (g.dropna()+1).cumprod()\n",
     "t = pd.concat(temp).reset_index(0, drop=True)\n",
-    "t.unstack(level='series').plot()"
+    "t.unstack(level='series').plot()\n",
+    "tranche_returns.to_csv('/home/serenitas/edwin/Python/temp3.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "attach = 0\n",
+    "\n",
+    "returns = tranche_returns.xs(['HY', 29, '5yr', attach], level = ['index', 'series', 'tenor','attach'])['delhedged_return']\n",
+    "model = gini_calc.xs(['HY', 29, '5yr', attach], level = ['index', 'series', 'tenor','attach'])['mispricing']\n",
+    "returns = pd.merge(returns, model, left_index=True, right_index=True)\n",
+    "model_verification = smf.ols(\"delhedged_return ~ mispricing \", data=returns).fit()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tranche_returns.xs(29, level='series').unstack(level='attach').to_csv('/home/serenitas/edwin/Python/temp1.csv')"
    ]
   },
   {
@@ -257,6 +303,105 @@
    "source": [
     "today, predict_HY33"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "deltas = []\n",
+    "for s in portf.swaptions:\n",
+    "    deltas.append(s.delta)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Old Model\n",
+    "value_date = (datetime.datetime.today() - pd.offsets.BDay(1)).date()\n",
+    "start = (datetime.datetime.today() - pd.offsets.BDay(1) * 365 *4).date()\n",
+    "#end = (start + pd.offsets.BDay(1) * 365).date()\n",
+    "end = datetime.datetime.today()\n",
+    "gini_model, gini_results = {}, {}\n",
+    "for index_type in ['HY', 'IG', 'EU', 'XO']:\n",
+    "    risk = disp.get_tranche_data(dbconn(\"serenitasdb\"), index_type)\n",
+    "    gini_model[index_type], gini_results[index_type] = disp.create_models_separate(risk)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gini_model['HY'][0].summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gini_results['HY']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gini_results.to_csv('/home/serenitas/edwin/results.csv', header=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for index_type in ['HY', 'IG', 'EU', 'XO']:\n",
+    "    gini_results[index_type].to_csv('/home/serenitas/edwin/' + index_type + '_results.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "to_plot = gini_results.xs(0, level='attach')['mispricing']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "to_plot.groupby(['date', 'index','tenor']).nth(-1).plot()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gini_results.xs(31, level='series')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -275,7 +420,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.1-final"
+   "version": "3.8.1"
   }
  },
  "nbformat": 4,