diff options
Diffstat (limited to 'python')
| -rw-r--r-- | python/exploration/dispersion.py | 21 | ||||
| -rw-r--r-- | python/notebooks/Dispersion.ipynb | 321 |
2 files changed, 58 insertions, 284 deletions
diff --git a/python/exploration/dispersion.py b/python/exploration/dispersion.py index c9f219cf..c7bc33d9 100644 --- a/python/exploration/dispersion.py +++ b/python/exploration/dispersion.py @@ -119,18 +119,25 @@ def create_models(conn, df) -> (pd.DataFrame, float): return (df, model) -def create_models_v2(conn, df) -> (pd.DataFrame, float): +def create_models_v2(conn, df, weights=None) -> (pd.DataFrame, float): # Takes the output of get_tranche_data attach_max = df.index.get_level_values("attach").max() bottom_stack = df[df.index.get_level_values("attach") != attach_max] - model = smf.ols( + if weights is None: + weights = np.ones(len(bottom_stack)) + else: + weights.name = "resids" + bottom_stack = bottom_stack.merge(weights, left_index=True, right_index=True) + weights = np.array(bottom_stack.resids) + model = smf.wls( "logit(tranche_loss_per) ~ " - "np.log(index_duration) + " - "np.log(moneyness) * gini + " - "np.log(index_expected_loss)* gini + " - "expit(att_moneyness) +" - "expit(det_moneyness)", + "np.log(index_duration) * np.log(gini)+ " + "np.log(moneyness) * np.log(gini) + " + "I(np.log(gini)**2) +" + "expit(att_moneyness) + I(expit(att_moneyness)**2) +" + "expit(det_moneyness) + I(expit(det_moneyness)**2)", data=bottom_stack, + weights=weights, ) f = model.fit() df.loc[ diff --git a/python/notebooks/Dispersion.ipynb b/python/notebooks/Dispersion.ipynb index 1b722f50..0d7e4cd3 100644 --- a/python/notebooks/Dispersion.ipynb +++ b/python/notebooks/Dispersion.ipynb @@ -18,6 +18,7 @@ "from analytics.basket_index import MarkitBasketIndex\n", "from analytics import on_the_run\n", "from statsmodels.graphics.regressionplots import plot_fit\n", + "from scipy.special import logit, expit\n", "from pygam import LinearGAM, s, f, GAM\n", "from utils.db import dbengine, dbconn" ] @@ -41,68 +42,17 @@ "start = (datetime.datetime.today() - pd.offsets.BDay(1) * 365 *4).date()\n", "#end = (start + pd.offsets.BDay(1) * 365).date()\n", "end = datetime.datetime.today()\n", - "index_type = 'IG'\n", - "serenitasconn = dbconn(\"serenitasdb\")\n", - "serenitasconn.autocommit = True\n", - "risk = disp.get_tranche_data(serenitasconn, index_type)\n", - "train_data = risk[start: end]\n", - "gini_calc, gini_model = disp.create_models(serenitasconn, train_data)\n", - "gini_model.fit().summary()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "gini_calc.xs(31, level = 'series')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#use trained model to fit rest of the data\n", - "f = gini_model.fit()\n", - "risk.loc[risk.index.get_level_values(\"attach\") != attach_max, \"predict\"] = expit(f.predict(bottom_stack))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#Plot Gini if (use gini=True, use_log=False)\n", - "to_plot_gini = gini_calc.xs(0, level='attach').groupby(['date', 'series']).nth(-1)\n", - "to_plot_gini['gini'].unstack().plot()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#last day: mispricing\n", - "today = gini_calc.xs([value_date,33], level=['date','series'])\n", - "today[['exp_percentage', 'predict', 'mispricing']]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#plot mispricing of a tranche through time \n", - "attach = 0\n", - "series = 33\n", - "to_plot = gini_calc.xs([attach, series], level=['attach', 'series'])['mispricing']\n", - "to_plot.reset_index(['index','tenor'], drop=True).unstack().plot()" + "gini_model, gini_results = {}, {}\n", + "conn = dbconn(\"serenitasdb\")\n", + "conn.autocommit = True\n", + "for index_type in ['HY', 'IG', 'EU', 'XO']:\n", + " risk = disp.get_tranche_data(dbconn(\"serenitasdb\"), index_type)\n", + " risk = risk[risk.index_duration > .5] #filter out the short duration ones\n", + " gini_results[index_type], gini_model[index_type] = disp.create_models_v2(conn, risk)\n", + " fitted = gini_model[index_type].fit()\n", + " w = 1/(expit(fitted.fittedvalues + fitted.resid) -expit(fitted.fittedvalues))**2\n", + " gini_results[index_type], gini_model[index_type] = disp.create_models_v2(conn, risk, w)\n", + "gini_model['HY'].fit().summary()" ] }, { @@ -111,7 +61,9 @@ "metadata": {}, "outputs": [], "source": [ - "gini_calc.xs([attach, series], level=['attach', 'series']).to_clipboard()" + "fieldlist = ['exp_percentage','dispersion','gini','tranche_loss_per','mispricing']\n", + "for index_type in ['HY', 'IG', 'EU', 'XO']:\n", + " gini_results[index_type][fieldlist].to_csv('/home/serenitas/edwin/' + index_type + '_results.csv')" ] }, { @@ -120,10 +72,12 @@ "metadata": {}, "outputs": [], "source": [ - "#plot mispricing of series through time \n", - "series = 33\n", - "to_plot = gini_calc.xs(series, level='series')['mispricing']\n", - "to_plot.reset_index(['index','tenor'], drop=True).unstack().plot()" + "#Run a particular gini scenario\n", + "scenario = gini_results['HY'].loc(axis=0)[value_date,'HY',33,:,'5yr',0]\n", + "scenario['gini'].iloc[0] = .7\n", + "scenario_disp = expit(gini_model['HY'].fit().predict(scenario))\n", + "mispricing = scenario['tranche_loss_per'] - scenario_disp\n", + "mispricing" ] }, { @@ -132,7 +86,18 @@ "metadata": {}, "outputs": [], "source": [ - "plot_fit(gini_model[0], 'np.log(index_duration)')" + "#plot the residuals\n", + "fitted = gini_model['HY'].fit()\n", + "plt.figure(figsize=(8,5))\n", + "p=plt.scatter(x=expit(fitted.fittedvalues),y=expit(fitted.fittedvalues + fitted.resid) -expit(fitted.fittedvalues),edgecolor='k')\n", + "xmin=min(expit(fitted.fittedvalues))\n", + "xmax = max(expit(fitted.fittedvalues))\n", + "plt.hlines(y=0,xmin=xmin*0.9,xmax=xmax*1.1,color='red',linestyle='--',lw=3)\n", + "plt.xlabel(\"Fitted values\",fontsize=15)\n", + "plt.ylabel(\"Residuals\",fontsize=15)\n", + "plt.title(\"Fitted vs. residuals plot\",fontsize=18)\n", + "plt.grid(True)\n", + "plt.show()" ] }, { @@ -141,9 +106,17 @@ "metadata": {}, "outputs": [], "source": [ - "to_csv('/home/serenitas/edwin/Python/temp1.csv')\n", - "\n", - "gini_calc.to_csv('/home/serenitas/edwin/Python/' + index_type+ '_tranche_model.csv')" + "value_date = (datetime.datetime.today() - pd.offsets.BDay(1)).date()\n", + "start = (datetime.datetime.today() - pd.offsets.BDay(1) * 365 *4).date()\n", + "#end = (start + pd.offsets.BDay(1) * 365).date()\n", + "end = datetime.datetime.today()\n", + "index_type = 'IG'\n", + "serenitasconn = dbconn(\"serenitasdb\")\n", + "serenitasconn.autocommit = True\n", + "risk = disp.get_tranche_data(serenitasconn, index_type)\n", + "train_data = risk[start: end]\n", + "gini_calc, gini_model = disp.create_models(serenitasconn, train_data)\n", + "gini_model.fit().summary()" ] }, { @@ -187,212 +160,6 @@ "source": [ "tranche_returns.xs(29, level='series').unstack(level='attach').to_csv('/home/serenitas/edwin/Python/temp1.csv')" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#compare models\n", - "a = [True, False]\n", - "for years in [1,2,3,4,5,6]:\n", - " date_range = pd.bdate_range(end=value_date, freq='5B',periods=52*years)\n", - " risk = disp.get_tranche_data(index_type, serenitas_engine)\n", - " risk = risk[risk.index.get_level_values(0).isin(date_range)]\n", - " for x, y in list(itertools.product(a,a)):\n", - " gini_model, gini_calc = disp.create_models(risk, use_gini=x, use_log=y)\n", - " for i, m in gini_model.items():\n", - " print (years, x, y, i, m.rsquared)\n", - " today = gini_calc.xs([value_date,33], level=['date','series'])\n", - " print (today[['exp_percentage', 'predict_N', 'predict_preN', 'mispricing']])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#Run a particular gini scenario\n", - "scenario = gini_calc.loc(axis=0)[value_date,33,'HY','5yr',0]\n", - "scenario['dispersion'] = .6\n", - "scenario_disp = np.exp(gini_model[0].predict(scenario))\n", - "mispricing = (scenario['exp_percentage'] - scenario_disp) * \\\n", - " scenario['index_expected_loss'] / \\\n", - " (scenario['detach_adj'] - scenario['attach_adj']) / \\\n", - " scenario['indexfactor'] * 10000\n", - "mispricing" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "gini_calc.loc(axis=0)[:,33,'HY','5yr',0]['mispricing']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#Let's use a GAM model instead?\n", - "#only use the 5yr point for modeling\n", - "equity = gini_calc.loc(axis=0)[:,:,[25,27,29,31,33],'5yr',0]\n", - "X = np.array(equity[['gini_spread', 'duration', 'moneyness']])\n", - "y = np.array(equity['exp_percentage'])\n", - "\n", - "#Fit for Lamda\n", - "gam_model = GAM(s(0, n_splines=5) +\n", - " s(1, n_splines=5) +\n", - " s(2, n_splines=5))\n", - "lam = np.logspace(-3, 5, 5, base=3)\n", - "lams = [lam] * 3\n", - "gam_model.gridsearch(X, y, lam=lams)\n", - "\n", - "gam_model.summary()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "## plotting\n", - "fig, axs = plt.subplots(1,3);\n", - "\n", - "titles = ['gini_spread', 'duration', 'moneyness']\n", - "for i, ax in enumerate(axs):\n", - " XX = gam_model.generate_X_grid(term=i)\n", - " ax.plot(XX[:, i], gam_model.partial_dependence(term=i, X=XX))\n", - " ax.plot(XX[:, i], gam_model.partial_dependence(term=i, X=XX, width=.95)[1], c='r', ls='--')\n", - " if i == 0:\n", - " ax.set_ylim(-30,30)\n", - " ax.set_title(titles[i]);" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.scatter(y, gam_model.predict(X))\n", - "plt.xlabel('actual correlation')\n", - "plt.ylabel('predicted correlation')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "today = gini_calc.loc(axis=0)[value_date,'HY',33,'5yr',0]\n", - "predict_HY33 = gam_model.predict(np.array(today[['gini_spread', 'duration', 'moneyness']]))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "today, predict_HY33" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "deltas = []\n", - "for s in portf.swaptions:\n", - " deltas.append(s.delta)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "value_date = (datetime.datetime.today() - pd.offsets.BDay(1)).date()\n", - "start = (datetime.datetime.today() - pd.offsets.BDay(1) * 365 *4).date()\n", - "#end = (start + pd.offsets.BDay(1) * 365).date()\n", - "end = datetime.datetime.today()\n", - "gini_model, gini_results = {}, {}\n", - "conn = dbconn(\"serenitasdb\")\n", - "conn.autocommit = True\n", - "for index_type in ['HY', 'IG', 'EU', 'XO']:\n", - " risk = disp.get_tranche_data(dbconn(\"serenitasdb\"), index_type)\n", - " #gini_results[index_type], gini_model[index_type] = disp.create_separate_models(risk)\n", - " gini_results[index_type], gini_model[index_type] = disp.create_models_v2(conn, risk)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#gini_model['HY'][0].summary()\n", - "gini_model['HY'].fit().summary()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "gini_results['HY']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fieldlist = ['exp_percentage','dispersion','gini','tranche_loss_per','mispricing']\n", - "for index_type in ['HY', 'IG', 'EU', 'XO']:\n", - " gini_results[index_type][fieldlist].to_csv('/home/serenitas/edwin/' + index_type + '_results.csv')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { |
