1 files changed, 74 insertions, 53 deletions
diff --git a/python/notebooks/dispersion_tranche_model.ipynb b/python/notebooks/dispersion_tranche_model.ipynb
index a2bb5a67..cd59fb3f 100644
--- a/python/notebooks/dispersion_tranche_model.ipynb
+++ b/python/notebooks/dispersion_tranche_model.ipynb
@@ -53,21 +53,22 @@
    "outputs": [],
    "source": [
     "#Section 1----------------------------------------------------\n",
+    "#index basis doesn't work with HY (opposite reaction to what I think)\n",
     "#RFE\n",
     "drop_variable_list = ['tranche_loss_per', 'tranche_id', 'index_price', 'detach', 'corr_at_detach', \n",
     "                      'corr01', 'exp_percentage', 'indexfactor', 'duration', 'index_expected_loss',\n",
     "                      'index_theta', 'delta', 'expected_loss', 'attach_adj', 'detach_adj',\n",
-    "                      'theta', 'cumulativeloss',  \n",
+    "                      'cumulativeloss',  \n",
     "                      'forward_delta', \n",
     "                      #Comment out to include\n",
-    "                      'index_duration',\n",
-    "    #                 'thickness',\n",
-    "    #                 'moneyness',\n",
+    "    #                  'index_duration',\n",
+    "                     'thickness',\n",
+    "                     'moneyness',\n",
     "    #                 'index_basis',\n",
-    "                      'att_moneyness', \n",
-    "                      'det_moneyness',\n",
+    "    #                  'att_moneyness', \n",
+    "    #                 'det_moneyness',\n",
     "                      'dispersion',\n",
-    "     #                'gini', \n",
+    "    #                 'gini', \n",
     "                      'gamma',\n",
     "                      'theta',\n",
     "                      'index_theta'\n",
@@ -77,7 +78,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "def run_rfe(index_type):\n",
@@ -93,6 +96,7 @@
     "    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n",
     "    \n",
     "    pipe_rfe = make_pipeline (PowerTransformer(),\n",
+    "                              #PolynomialFeatures(degree=2),\n",
     "                              PolynomialFeatures(interaction_only=True),\n",
     "                              RFECV(estimator=LinearRegression(), \n",
     "                                    cv=10,\n",
@@ -118,9 +122,9 @@
     "    #          'learning_rate': 0.01,\n",
     "    #          'loss': 'huber',\n",
     "    #          'verbose':1}\n",
-    "    #gb = GradientBoostingRegressor(**params).fit(X_p, y)\n",
+    "    #gb = GradientBoostingRegressor(**params).fit(X_train, y_train)\n",
     "    \n",
-    "    #model = VotingRegressor([('rf', model), ('lr', randomforest)]).fit(X_train, y_train)\n",
+    "    #model = VotingRegressor([('rf', model), ('gb', gb)]).fit(X_train, y_train)\n",
     "    #model = VotingRegressor([('lr', pipe_rfe)]).fit(X, logit(y))\n",
     "\n",
     "    df = pd.merge(risk, \n",
@@ -167,43 +171,46 @@
    "source": [
     "#examine the effect of any paricular variable\n",
     "steps = 100\n",
-    "index_type = 'HY'\n",
-    "plots = {}\n",
-    "tranche_attach = []\n",
+    "for index_type in ['HY', 'IG', 'EU', 'XO']:\n",
+    "    plots = {}\n",
+    "    tranche_attach = []\n",
+    "\n",
+    "    for i, X in gini_X[index_type].groupby('attach'):\n",
+    "        tranche_attach.append(X.index[0][5])\n",
+    "        for var in X.columns:\n",
+    "            bins = np.linspace(X[var].min(), X[var].max(),num=steps)\n",
+    "            testing_df = pd.DataFrame(bins, columns=[var])\n",
+    "            for var_1 in X.drop(var, axis=1).columns:\n",
+    "                testing_df = pd.concat([testing_df, pd.Series(np.repeat(X.iloc[-1][var_1], steps),name=var_1)], axis=1)\n",
+    "            plots[i, var] = pd.Series(expit(gini_model[index_type].predict(testing_df[X.columns])), index=testing_df[var])\n",
     "\n",
-    "for i, X in gini_X[index_type].groupby('attach'):\n",
-    "    tranche_attach.append(X.index[0][5])\n",
-    "    for var in X.columns:\n",
-    "        bins = np.linspace(X[var].min(), X[var].max(),num=steps)\n",
-    "        testing_df = pd.DataFrame(bins, columns=[var])\n",
-    "        for var_1 in X.drop(var, axis=1).columns:\n",
-    "            testing_df = pd.concat([testing_df, pd.Series(np.repeat(X.iloc[-1][var_1], steps),name=var_1)], axis=1)\n",
-    "        plots[i, var] = pd.Series(expit(gini_model[index_type].predict(testing_df[X.columns])), index=testing_df[var])\n",
+    "    sensitivies = pd.concat(plots, names=['attach', 'shock', 'value'])\n",
+    "    sensitivies.to_csv('/home/serenitas/edwin/DispersionModel/' + index_type + '_sensitivies.csv')\n",
     "\n",
-    "fig, axes = plt.subplots(nrows=3, ncols=len(X.columns), figsize = (20,10))\n",
-    "for i, p in enumerate(plots):\n",
-    "    x_loc = int(i/len(X.columns))\n",
-    "    y_loc = i % len(X.columns)\n",
-    "    if x_loc == 0:\n",
-    "        axes[x_loc, y_loc].set_title(p[1]) \n",
-    "    plots[p].plot(ax=axes[x_loc, y_loc], label=i, xlabel=\"\")\n",
-    "for i in [0,1,2]:\n",
-    "    fig.axes[i*len(X.columns)].text(-0.2, 0.5, \"tranche attach: \"  + str(tranche_attach[i]),\n",
-    "                                    transform=fig.axes[i*len(X.columns)].transAxes,\n",
-    "                                    verticalalignment='center',\n",
-    "                                    rotation=90)\n",
-    "fig.savefig(\"/home/serenitas/edwin/PythonGraphs/dispersion_model.png\", bbox_inches='tight')\n",
+    "    fig, axes = plt.subplots(nrows=3, ncols=len(X.columns), figsize = (20,10))\n",
+    "    for i, p in enumerate(plots):\n",
+    "        x_loc = int(i/len(X.columns))\n",
+    "        y_loc = i % len(X.columns)\n",
+    "        if x_loc == 0:\n",
+    "            axes[x_loc, y_loc].set_title(p[1]) \n",
+    "        plots[p].plot(ax=axes[x_loc, y_loc], label=i, xlabel=\"\")\n",
+    "    for i in [0,1,2]:\n",
+    "        fig.axes[i*len(X.columns)].text(-0.2, 0.5, \"tranche attach: \"  + str(tranche_attach[i]),\n",
+    "                                        transform=fig.axes[i*len(X.columns)].transAxes,\n",
+    "                                        verticalalignment='center',\n",
+    "                                        rotation=90)\n",
+    "    fig.savefig(\"/home/serenitas/edwin/PythonGraphs/dispersion_model.png\", bbox_inches='tight')\n",
     "\n",
-    "fig_1, axes_1 = plt.subplots(nrows=3, ncols=1, figsize = (15,8))\n",
-    "for i, p in enumerate(plots):\n",
-    "    x_loc = int(i/len(X.columns))\n",
-    "    plots[p].plot(ax=axes_1[x_loc], label=p[1], xlabel=\"\", legend=True)\n",
-    "for i in [0,1,2]:\n",
-    "    fig_1.axes[i].text(-0.05, 0.5, \"tranche attach: \"  + str(tranche_attach[i]),\n",
-    "                                    transform=fig_1.axes[i].transAxes,\n",
-    "                                    verticalalignment='center',\n",
-    "                                    rotation=90)\n",
-    "fig_1.savefig(\"/home/serenitas/edwin/PythonGraphs/dispersion_model_consolidated.png\", bbox_inches='tight')"
+    "    fig_1, axes_1 = plt.subplots(nrows=3, ncols=1, figsize = (15,8))\n",
+    "    for i, p in enumerate(plots):\n",
+    "        x_loc = int(i/len(X.columns))\n",
+    "        plots[p].plot(ax=axes_1[x_loc], label=p[1], xlabel=\"\", legend=True)\n",
+    "    for i in [0,1,2]:\n",
+    "        fig_1.axes[i].text(-0.05, 0.5, \"tranche attach: \"  + str(tranche_attach[i]),\n",
+    "                                        transform=fig_1.axes[i].transAxes,\n",
+    "                                        verticalalignment='center',\n",
+    "                                        rotation=90)\n",
+    "    fig_1.savefig(\"/home/serenitas/edwin/PythonGraphs/dispersion_model_consolidated.png\", bbox_inches='tight')"
    ]
   },
   {
@@ -213,6 +220,19 @@
    "outputs": [],
    "source": [
     "#Section 2----------------------------------------------------\n",
+    "#plot the gini coefficients\n",
+    "for index_type in ['HY', 'IG', 'EU', 'XO']:\n",
+    "    ginis = gini_results[index_type].xs([0, '5yr', index_type],level=['attach','tenor', 'index']).groupby(['date', 'series']).nth(-1).gini.unstack(level='series')\n",
+    "    ginis.to_csv('/home/serenitas/edwin/DispersionModel/' + index_type + '_gini.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Section 3----------------------------------------------------\n",
     "#Fixed Model\n",
     "value_date = (datetime.datetime.today() - pd.offsets.BDay(1)).date()\n",
     "start = (datetime.datetime.today() - pd.offsets.BDay(1) * 365 *4).date()\n",
@@ -236,6 +256,13 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "#plot the residuals\n",
     "fitted = gini_model['HY'].fit()\n",
@@ -256,13 +283,7 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": [
-    "#Section 3----------------------------------------------------\n",
-    "#plot the gini coefficients\n",
-    "index_type='EU'\n",
-    "ginis = gini_results[index_type].xs([0, '5yr', index_type],level=['attach','tenor', 'index']).groupby(['date', 'series']).nth(-1).gini.unstack(level='series')\n",
-    "ginis.sort_index(ascending=False).to_clipboard()"
-   ]
+   "source": []
   },
   {
    "cell_type": "code",
@@ -274,9 +295,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3.9.1 64-bit",
    "language": "python",
-   "name": "python3"
+   "name": "python39164bit6ddd573894c04d6a858a9a58880cc9d4"
   },
   "language_info": {
    "codemirror_mode": {