diff options
Diffstat (limited to 'python/notebooks/dispersion_tranche_model.ipynb')
| -rw-r--r-- | python/notebooks/dispersion_tranche_model.ipynb | 80 |
1 files changed, 67 insertions, 13 deletions
diff --git a/python/notebooks/dispersion_tranche_model.ipynb b/python/notebooks/dispersion_tranche_model.ipynb index cd59fb3f..46eb348c 100644 --- a/python/notebooks/dispersion_tranche_model.ipynb +++ b/python/notebooks/dispersion_tranche_model.ipynb @@ -3,7 +3,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "import pandas as pd\n", @@ -19,7 +21,8 @@ "from serenitas.analytics import on_the_run\n", "from statsmodels.graphics.regressionplots import plot_fit\n", "from scipy.special import logit, expit\n", - "from serenitas.utils.db import dbengine, dbconn" + "from serenitas.utils.db import dbengine, dbconn\n", + "import statsmodels.formula.api as smf" ] }, { @@ -150,7 +153,7 @@ " )\n", " rfe_result = pipe_rfe\n", " print(index_type, \" num features: \", n_features_to_select)\n", - " print(index_type, \" Chosen columns: \", np.array(rfe_result['polynomialfeatures'].get_feature_names(X.columns))[rfe_result['rfe'].support_])\n", + " print(index_type, \" Chosen columns: \", np.array(rfe_result['polynomialfeatures'].get_feature_names_out(X.columns))[rfe_result['rfe'].support_])\n", " print(index_type, \" Training Score: \", model.score(X_train, y_train))\n", " print(index_type, \" Testing Score: \", model.score(X_test, y_test))\n", " \n", @@ -166,7 +169,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "#examine the effect of any paricular variable\n", @@ -222,7 +227,7 @@ "#Section 2----------------------------------------------------\n", "#plot the gini coefficients\n", "for index_type in ['HY', 'IG', 'EU', 'XO']:\n", - " ginis = gini_results[index_type].xs([0, '5yr', index_type],level=['attach','tenor', 'index']).groupby(['date', 'series']).nth(-1).gini.unstack(level='series')\n", + " ginis = gini_results[index_type].xs((0, '5yr', index_type),level=['attach','tenor', 'index']).groupby(['date', 'series']).nth(-1).gini.unstack(level='series')\n", " ginis.to_csv('/home/serenitas/edwin/DispersionModel/' + index_type + '_gini.csv')" ] }, @@ -256,13 +261,6 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], "source": [ "#plot the residuals\n", "fitted = gini_model['HY'].fit()\n", @@ -283,6 +281,62 @@ "execution_count": null, "metadata": {}, "outputs": [], + "source": [ + "#Section 4----------------------------------------------------\n", + "#Model gini? let's try it out with duration and spread\n", + "index_type = 'HY'\n", + "risk = disp.get_tranche_data(dbconn(\"serenitasdb\"), index_type).dropna(subset=['gini', 'index_duration', 'index_expected_loss'])\n", + "data = risk[['gini', 'index_duration', 'index_expected_loss']]\n", + "ols_model = smf.ols(\"gini ~ np.log(index_duration) + np.log(index_expected_loss)\", data=data).fit()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [] }, { @@ -309,7 +363,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.1-final" + "version": "3.10.2" } }, "nbformat": 4, |
