{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import itertools\n", "import datetime\n", "import exploration.dispersion as disp\n", "import matplotlib.pyplot as plt\n", "import statsmodels.formula.api as smf\n", "import serenitas.analytics.tranche_data as tdata\n", "\n", "from serenitas.analytics.basket_index import MarkitBasketIndex\n", "from serenitas.analytics import on_the_run\n", "from statsmodels.graphics.regressionplots import plot_fit\n", "from scipy.special import logit, expit\n", "from serenitas.utils.db import dbengine, dbconn" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.preprocessing import PolynomialFeatures, PowerTransformer\n", "from sklearn.linear_model import LinearRegression\n", "from sklearn.pipeline import make_pipeline\n", "from sklearn.feature_selection import RFECV, RFE\n", "from sklearn.compose import TransformedTargetRegressor\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, VotingRegressor" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Section 1----------------------------------------------------\n", "#index basis doesn't work with HY (opposite reaction to what I think)\n", "#RFE\n", "drop_variable_list = ['tranche_loss_per', 'tranche_id', 'index_price', 'detach', 'corr_at_detach', \n", " 'corr01', 'exp_percentage', 'indexfactor', 'duration', 'index_expected_loss',\n", " 'index_theta', 'delta', 'expected_loss', 'attach_adj', 'detach_adj',\n", " 'cumulativeloss', \n", " 'forward_delta', \n", " #Comment out to include\n", " # 'index_duration',\n", " 'thickness',\n", " 'moneyness',\n", " # 'index_basis',\n", " # 'att_moneyness', \n", " # 'det_moneyness',\n", " 'dispersion',\n", " # 'gini', \n", " 'gamma',\n", " 'theta',\n", " 'index_theta'\n", " ]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [] }, "outputs": [], "source": [ "def run_rfe(index_type):\n", " risk = disp.get_tranche_data(dbconn(\"serenitasdb\"), index_type)\n", " attach_max = risk.index.get_level_values(\"attach\").max()\n", " bottom_stack = risk[risk.index.get_level_values(\"attach\") != attach_max]\n", " bottom_stack = bottom_stack[bottom_stack.tranche_loss_per > 0].dropna()\n", "\n", " #prepare the variables\n", " y = logit(bottom_stack['tranche_loss_per'])\n", " X = bottom_stack.drop(drop_variable_list, axis=1)\n", " \n", " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n", " \n", " pipe_rfe = make_pipeline (PowerTransformer(),\n", " #PolynomialFeatures(degree=2),\n", " PolynomialFeatures(interaction_only=True),\n", " RFECV(estimator=LinearRegression(), \n", " cv=10,\n", " min_features_to_select=1))\n", " \n", " pipe_rfe.fit(X_train, y_train)\n", " n_features_to_select = pipe_rfe['rfecv'].n_features_\n", " pipe_rfe.steps[-1]= ('rfe', RFE(estimator=LinearRegression(), n_features_to_select = n_features_to_select))\n", " model = pipe_rfe.fit(X_train, y_train)\n", " \n", " #RandomForest\n", " #params = {'n_estimators': 100,\n", " # 'min_samples_split': 3,\n", " # 'verbose':1,\n", " # 'n_jobs': -1}\n", " #randomforest = RandomForestRegressor(**params)\n", " \n", " \n", " #gradientboost\n", " #params = {'n_estimators': 500,\n", " # 'max_depth': 10,\n", " # 'min_samples_split': 3,\n", " # 'learning_rate': 0.01,\n", " # 'loss': 'huber',\n", " # 'verbose':1}\n", " #gb = GradientBoostingRegressor(**params).fit(X_train, y_train)\n", " \n", " #model = VotingRegressor([('rf', model), ('gb', gb)]).fit(X_train, y_train)\n", " #model = VotingRegressor([('lr', pipe_rfe)]).fit(X, logit(y))\n", "\n", " df = pd.merge(risk, \n", " pd.DataFrame(expit(model.predict(X)), \n", " index=X.index, \n", " columns=['predict_tranche_loss']),\n", " how='left', left_index=True, right_index=True)\n", "\n", " df.loc[df.index.get_level_values(\"attach\") != attach_max, \"predict_tranche_loss_per_index\"] = (\n", " df.predict_tranche_loss * df.thickness / df.index_expected_loss\n", " )\n", "\n", " def aux(s):\n", " temp = s.values\n", " temp[-1] = 1 - temp[:-1].sum()\n", " return temp\n", "\n", " df[\"predict_tranche_loss_per_index\"] = df.groupby([\"index\", \"series\", \"date\"])[\"predict_tranche_loss_per_index\"].transform(aux)\n", " df = df.assign(\n", " mispricing=(df.exp_percentage - df.predict_tranche_loss_per_index)\n", " * df.index_expected_loss\n", " / (df.detach_adj - df.attach_adj)\n", " )\n", " rfe_result = pipe_rfe\n", " print(index_type, \" num features: \", n_features_to_select)\n", " print(index_type, \" Chosen columns: \", np.array(rfe_result['polynomialfeatures'].get_feature_names(X.columns))[rfe_result['rfe'].support_])\n", " print(index_type, \" Training Score: \", model.score(X_train, y_train))\n", " print(index_type, \" Testing Score: \", model.score(X_test, y_test))\n", " \n", " return model, df, X\n", "\n", "gini_model, gini_results, gini_X = {}, {}, {}\n", "fieldlist = ['exp_percentage','dispersion','gini','tranche_loss_per','mispricing']\n", "for index_type in ['HY', 'IG', 'EU', 'XO']:\n", " gini_model[index_type], gini_results[index_type], gini_X[index_type] = run_rfe(index_type)\n", " gini_results[index_type][fieldlist].to_csv('/home/serenitas/edwin/DispersionModel/' + index_type + '_results_rfecv.csv')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#examine the effect of any paricular variable\n", "steps = 100\n", "for index_type in ['HY', 'IG', 'EU', 'XO']:\n", " plots = {}\n", " tranche_attach = []\n", "\n", " for i, X in gini_X[index_type].groupby('attach'):\n", " tranche_attach.append(X.index[0][5])\n", " for var in X.columns:\n", " bins = np.linspace(X[var].min(), X[var].max(),num=steps)\n", " testing_df = pd.DataFrame(bins, columns=[var])\n", " for var_1 in X.drop(var, axis=1).columns:\n", " testing_df = pd.concat([testing_df, pd.Series(np.repeat(X.iloc[-1][var_1], steps),name=var_1)], axis=1)\n", " plots[i, var] = pd.Series(expit(gini_model[index_type].predict(testing_df[X.columns])), index=testing_df[var])\n", "\n", " sensitivies = pd.concat(plots, names=['attach', 'shock', 'value'])\n", " sensitivies.to_csv('/home/serenitas/edwin/DispersionModel/' + index_type + '_sensitivies.csv')\n", "\n", " fig, axes = plt.subplots(nrows=3, ncols=len(X.columns), figsize = (20,10))\n", " for i, p in enumerate(plots):\n", " x_loc = int(i/len(X.columns))\n", " y_loc = i % len(X.columns)\n", " if x_loc == 0:\n", " axes[x_loc, y_loc].set_title(p[1]) \n", " plots[p].plot(ax=axes[x_loc, y_loc], label=i, xlabel=\"\")\n", " for i in [0,1,2]:\n", " fig.axes[i*len(X.columns)].text(-0.2, 0.5, \"tranche attach: \" + str(tranche_attach[i]),\n", " transform=fig.axes[i*len(X.columns)].transAxes,\n", " verticalalignment='center',\n", " rotation=90)\n", " fig.savefig(\"/home/serenitas/edwin/PythonGraphs/dispersion_model.png\", bbox_inches='tight')\n", "\n", " fig_1, axes_1 = plt.subplots(nrows=3, ncols=1, figsize = (15,8))\n", " for i, p in enumerate(plots):\n", " x_loc = int(i/len(X.columns))\n", " plots[p].plot(ax=axes_1[x_loc], label=p[1], xlabel=\"\", legend=True)\n", " for i in [0,1,2]:\n", " fig_1.axes[i].text(-0.05, 0.5, \"tranche attach: \" + str(tranche_attach[i]),\n", " transform=fig_1.axes[i].transAxes,\n", " verticalalignment='center',\n", " rotation=90)\n", " fig_1.savefig(\"/home/serenitas/edwin/PythonGraphs/dispersion_model_consolidated.png\", bbox_inches='tight')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Section 2----------------------------------------------------\n", "#plot the gini coefficients\n", "for index_type in ['HY', 'IG', 'EU', 'XO']:\n", " ginis = gini_results[index_type].xs([0, '5yr', index_type],level=['attach','tenor', 'index']).groupby(['date', 'series']).nth(-1).gini.unstack(level='series')\n", " ginis.to_csv('/home/serenitas/edwin/DispersionModel/' + index_type + '_gini.csv')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Section 3----------------------------------------------------\n", "#Fixed Model\n", "value_date = (datetime.datetime.today() - pd.offsets.BDay(1)).date()\n", "start = (datetime.datetime.today() - pd.offsets.BDay(1) * 365 *4).date()\n", "end = datetime.datetime.today()\n", "gini_model, gini_results = {}, {}\n", "conn = dbconn(\"serenitasdb\")\n", "conn.autocommit = True\n", "for index_type in ['HY', 'IG', 'EU', 'XO']:\n", " risk = disp.get_tranche_data(dbconn(\"serenitasdb\"), index_type)\n", " risk = risk[risk.index_duration > 1] #filter out the short duration ones\n", " gini_results[index_type], gini_model[index_type] = disp.create_models_v2(conn, risk)\n", "gini_model['HY'].fit().summary()\n", "\n", "fieldlist = ['exp_percentage','dispersion','gini','tranche_loss_per','mispricing']\n", "for index_type in ['HY', 'IG', 'EU', 'XO']:\n", " gini_results[index_type][fieldlist].to_csv('/home/serenitas/edwin/DispersionModel/' + index_type + '_results.csv')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#plot the residuals\n", "fitted = gini_model['HY'].fit()\n", "plt.figure(figsize=(8,5))\n", "p=plt.scatter(x=expit(fitted.fittedvalues),y=expit(fitted.fittedvalues + fitted.resid) -expit(fitted.fittedvalues),edgecolor='k')\n", "xmin=min(expit(fitted.fittedvalues))\n", "xmax = max(expit(fitted.fittedvalues))\n", "plt.hlines(y=0,xmin=xmin*0.9,xmax=xmax*1.1,color='red',linestyle='--',lw=3)\n", "plt.xlabel(\"Fitted values\",fontsize=15)\n", "plt.ylabel(\"Residuals\",fontsize=15)\n", "plt.title(\"Fitted vs. residuals plot\",fontsize=18)\n", "plt.grid(True)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3.9.1 64-bit", "language": "python", "name": "python39164bit6ddd573894c04d6a858a9a58880cc9d4" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.1-final" } }, "nbformat": 4, "nbformat_minor": 4 }