aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/notebooks/dispersion_tranche_model.ipynb198
1 files changed, 27 insertions, 171 deletions
diff --git a/python/notebooks/dispersion_tranche_model.ipynb b/python/notebooks/dispersion_tranche_model.ipynb
index 46eb348c..56255a42 100644
--- a/python/notebooks/dispersion_tranche_model.ipynb
+++ b/python/notebooks/dispersion_tranche_model.ipynb
@@ -18,7 +18,7 @@
"import serenitas.analytics.tranche_data as tdata\n",
"\n",
"from serenitas.analytics.basket_index import MarkitBasketIndex\n",
- "from serenitas.analytics import on_the_run\n",
+ "from serenitas.analytics.index_data import on_the_run\n",
"from statsmodels.graphics.regressionplots import plot_fit\n",
"from scipy.special import logit, expit\n",
"from serenitas.utils.db import dbengine, dbconn\n",
@@ -52,117 +52,18 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#Section 1----------------------------------------------------\n",
- "#index basis doesn't work with HY (opposite reaction to what I think)\n",
- "#RFE\n",
- "drop_variable_list = ['tranche_loss_per', 'tranche_id', 'index_price', 'detach', 'corr_at_detach', \n",
- " 'corr01', 'exp_percentage', 'indexfactor', 'duration', 'index_expected_loss',\n",
- " 'index_theta', 'delta', 'expected_loss', 'attach_adj', 'detach_adj',\n",
- " 'cumulativeloss', \n",
- " 'forward_delta', \n",
- " #Comment out to include\n",
- " # 'index_duration',\n",
- " 'thickness',\n",
- " 'moneyness',\n",
- " # 'index_basis',\n",
- " # 'att_moneyness', \n",
- " # 'det_moneyness',\n",
- " 'dispersion',\n",
- " # 'gini', \n",
- " 'gamma',\n",
- " 'theta',\n",
- " 'index_theta'\n",
- " ]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
- "def run_rfe(index_type):\n",
- " risk = disp.get_tranche_data(dbconn(\"serenitasdb\"), index_type)\n",
- " attach_max = risk.index.get_level_values(\"attach\").max()\n",
- " bottom_stack = risk[risk.index.get_level_values(\"attach\") != attach_max]\n",
- " bottom_stack = bottom_stack[bottom_stack.tranche_loss_per > 0].dropna()\n",
- "\n",
- " #prepare the variables\n",
- " y = logit(bottom_stack['tranche_loss_per'])\n",
- " X = bottom_stack.drop(drop_variable_list, axis=1)\n",
- " \n",
- " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n",
- " \n",
- " pipe_rfe = make_pipeline (PowerTransformer(),\n",
- " #PolynomialFeatures(degree=2),\n",
- " PolynomialFeatures(interaction_only=True),\n",
- " RFECV(estimator=LinearRegression(), \n",
- " cv=10,\n",
- " min_features_to_select=1))\n",
- " \n",
- " pipe_rfe.fit(X_train, y_train)\n",
- " n_features_to_select = pipe_rfe['rfecv'].n_features_\n",
- " pipe_rfe.steps[-1]= ('rfe', RFE(estimator=LinearRegression(), n_features_to_select = n_features_to_select))\n",
- " model = pipe_rfe.fit(X_train, y_train)\n",
- " \n",
- " #RandomForest\n",
- " #params = {'n_estimators': 100,\n",
- " # 'min_samples_split': 3,\n",
- " # 'verbose':1,\n",
- " # 'n_jobs': -1}\n",
- " #randomforest = RandomForestRegressor(**params)\n",
- " \n",
- " \n",
- " #gradientboost\n",
- " #params = {'n_estimators': 500,\n",
- " # 'max_depth': 10,\n",
- " # 'min_samples_split': 3,\n",
- " # 'learning_rate': 0.01,\n",
- " # 'loss': 'huber',\n",
- " # 'verbose':1}\n",
- " #gb = GradientBoostingRegressor(**params).fit(X_train, y_train)\n",
- " \n",
- " #model = VotingRegressor([('rf', model), ('gb', gb)]).fit(X_train, y_train)\n",
- " #model = VotingRegressor([('lr', pipe_rfe)]).fit(X, logit(y))\n",
- "\n",
- " df = pd.merge(risk, \n",
- " pd.DataFrame(expit(model.predict(X)), \n",
- " index=X.index, \n",
- " columns=['predict_tranche_loss']),\n",
- " how='left', left_index=True, right_index=True)\n",
- "\n",
- " df.loc[df.index.get_level_values(\"attach\") != attach_max, \"predict_tranche_loss_per_index\"] = (\n",
- " df.predict_tranche_loss * df.thickness / df.index_expected_loss\n",
- " )\n",
- "\n",
- " def aux(s):\n",
- " temp = s.values\n",
- " temp[-1] = 1 - temp[:-1].sum()\n",
- " return temp\n",
- "\n",
- " df[\"predict_tranche_loss_per_index\"] = df.groupby([\"index\", \"series\", \"date\"])[\"predict_tranche_loss_per_index\"].transform(aux)\n",
- " df = df.assign(\n",
- " mispricing=(df.exp_percentage - df.predict_tranche_loss_per_index)\n",
- " * df.index_expected_loss\n",
- " / (df.detach_adj - df.attach_adj)\n",
- " )\n",
- " rfe_result = pipe_rfe\n",
- " print(index_type, \" num features: \", n_features_to_select)\n",
- " print(index_type, \" Chosen columns: \", np.array(rfe_result['polynomialfeatures'].get_feature_names_out(X.columns))[rfe_result['rfe'].support_])\n",
- " print(index_type, \" Training Score: \", model.score(X_train, y_train))\n",
- " print(index_type, \" Testing Score: \", model.score(X_test, y_test))\n",
- " \n",
- " return model, df, X\n",
- "\n",
- "gini_model, gini_results, gini_X = {}, {}, {}\n",
+ "#Run RFE model\n",
+ "gini_model, gini_results = {}, {}\n",
"fieldlist = ['exp_percentage','dispersion','gini','tranche_loss_per','mispricing']\n",
"for index_type in ['HY', 'IG', 'EU', 'XO']:\n",
- " gini_model[index_type], gini_results[index_type], gini_X[index_type] = run_rfe(index_type)\n",
+ " risk = disp.get_tranche_data(dbconn(\"serenitasdb\"), index_type)\n",
+ " risk = risk[risk.index_duration > 1] #filter out the short duration ones\n",
+ " gini_results[index_type], gini_model[index_type] = disp.create_rfe_models(risk)\n",
" gini_results[index_type][fieldlist].to_csv('/home/serenitas/edwin/DispersionModel/' + index_type + '_results_rfecv.csv')"
]
},
@@ -179,20 +80,27 @@
"for index_type in ['HY', 'IG', 'EU', 'XO']:\n",
" plots = {}\n",
" tranche_attach = []\n",
- "\n",
- " for i, X in gini_X[index_type].groupby('attach'):\n",
+ " \n",
+ " res = gini_results[index_type]\n",
+ " mod = gini_model[index_type]\n",
+ " \n",
+ " Xs = res[mod.feature_names_in_]\n",
+ " \n",
+ " for i, X in Xs.groupby('attach'):\n",
" tranche_attach.append(X.index[0][5])\n",
" for var in X.columns:\n",
" bins = np.linspace(X[var].min(), X[var].max(),num=steps)\n",
" testing_df = pd.DataFrame(bins, columns=[var])\n",
" for var_1 in X.drop(var, axis=1).columns:\n",
" testing_df = pd.concat([testing_df, pd.Series(np.repeat(X.iloc[-1][var_1], steps),name=var_1)], axis=1)\n",
- " plots[i, var] = pd.Series(expit(gini_model[index_type].predict(testing_df[X.columns])), index=testing_df[var])\n",
+ " plots[i, var] = pd.Series(expit(mod.predict(testing_df[X.columns])), index=testing_df[var])\n",
"\n",
+ " #breakpoint()\n",
+ " \n",
" sensitivies = pd.concat(plots, names=['attach', 'shock', 'value'])\n",
" sensitivies.to_csv('/home/serenitas/edwin/DispersionModel/' + index_type + '_sensitivies.csv')\n",
"\n",
- " fig, axes = plt.subplots(nrows=3, ncols=len(X.columns), figsize = (20,10))\n",
+ " fig, axes = plt.subplots(nrows=4, ncols=len(X.columns), figsize = (20,10))\n",
" for i, p in enumerate(plots):\n",
" x_loc = int(i/len(X.columns))\n",
" y_loc = i % len(X.columns)\n",
@@ -206,7 +114,7 @@
" rotation=90)\n",
" fig.savefig(\"/home/serenitas/edwin/PythonGraphs/dispersion_model.png\", bbox_inches='tight')\n",
"\n",
- " fig_1, axes_1 = plt.subplots(nrows=3, ncols=1, figsize = (15,8))\n",
+ " fig_1, axes_1 = plt.subplots(nrows=4, ncols=1, figsize = (15,8))\n",
" for i, p in enumerate(plots):\n",
" x_loc = int(i/len(X.columns))\n",
" plots[p].plot(ax=axes_1[x_loc], label=p[1], xlabel=\"\", legend=True)\n",
@@ -234,7 +142,9 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"#Section 3----------------------------------------------------\n",
@@ -259,7 +169,9 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"#plot the residuals\n",
@@ -289,69 +201,13 @@
"data = risk[['gini', 'index_duration', 'index_expected_loss']]\n",
"ols_model = smf.ols(\"gini ~ np.log(index_duration) + np.log(index_expected_loss)\", data=data).fit()\n"
]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
}
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3.9.1 64-bit",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
- "name": "python39164bit6ddd573894c04d6a858a9a58880cc9d4"
+ "name": "python3"
},
"language_info": {
"codemirror_mode": {
@@ -363,7 +219,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.10.2"
+ "version": "3.10.8"
}
},
"nbformat": 4,