aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/exploration/dispersion.py21
-rw-r--r--python/notebooks/Dispersion.ipynb321
2 files changed, 58 insertions, 284 deletions
diff --git a/python/exploration/dispersion.py b/python/exploration/dispersion.py
index c9f219cf..c7bc33d9 100644
--- a/python/exploration/dispersion.py
+++ b/python/exploration/dispersion.py
@@ -119,18 +119,25 @@ def create_models(conn, df) -> (pd.DataFrame, float):
return (df, model)
-def create_models_v2(conn, df) -> (pd.DataFrame, float):
+def create_models_v2(conn, df, weights=None) -> (pd.DataFrame, float):
# Takes the output of get_tranche_data
attach_max = df.index.get_level_values("attach").max()
bottom_stack = df[df.index.get_level_values("attach") != attach_max]
- model = smf.ols(
+ if weights is None:
+ weights = np.ones(len(bottom_stack))
+ else:
+ weights.name = "resids"
+ bottom_stack = bottom_stack.merge(weights, left_index=True, right_index=True)
+ weights = np.array(bottom_stack.resids)
+ model = smf.wls(
"logit(tranche_loss_per) ~ "
- "np.log(index_duration) + "
- "np.log(moneyness) * gini + "
- "np.log(index_expected_loss)* gini + "
- "expit(att_moneyness) +"
- "expit(det_moneyness)",
+ "np.log(index_duration) * np.log(gini)+ "
+ "np.log(moneyness) * np.log(gini) + "
+ "I(np.log(gini)**2) +"
+ "expit(att_moneyness) + I(expit(att_moneyness)**2) +"
+ "expit(det_moneyness) + I(expit(det_moneyness)**2)",
data=bottom_stack,
+ weights=weights,
)
f = model.fit()
df.loc[
diff --git a/python/notebooks/Dispersion.ipynb b/python/notebooks/Dispersion.ipynb
index 1b722f50..0d7e4cd3 100644
--- a/python/notebooks/Dispersion.ipynb
+++ b/python/notebooks/Dispersion.ipynb
@@ -18,6 +18,7 @@
"from analytics.basket_index import MarkitBasketIndex\n",
"from analytics import on_the_run\n",
"from statsmodels.graphics.regressionplots import plot_fit\n",
+ "from scipy.special import logit, expit\n",
"from pygam import LinearGAM, s, f, GAM\n",
"from utils.db import dbengine, dbconn"
]
@@ -41,68 +42,17 @@
"start = (datetime.datetime.today() - pd.offsets.BDay(1) * 365 *4).date()\n",
"#end = (start + pd.offsets.BDay(1) * 365).date()\n",
"end = datetime.datetime.today()\n",
- "index_type = 'IG'\n",
- "serenitasconn = dbconn(\"serenitasdb\")\n",
- "serenitasconn.autocommit = True\n",
- "risk = disp.get_tranche_data(serenitasconn, index_type)\n",
- "train_data = risk[start: end]\n",
- "gini_calc, gini_model = disp.create_models(serenitasconn, train_data)\n",
- "gini_model.fit().summary()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "gini_calc.xs(31, level = 'series')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#use trained model to fit rest of the data\n",
- "f = gini_model.fit()\n",
- "risk.loc[risk.index.get_level_values(\"attach\") != attach_max, \"predict\"] = expit(f.predict(bottom_stack))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#Plot Gini if (use gini=True, use_log=False)\n",
- "to_plot_gini = gini_calc.xs(0, level='attach').groupby(['date', 'series']).nth(-1)\n",
- "to_plot_gini['gini'].unstack().plot()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#last day: mispricing\n",
- "today = gini_calc.xs([value_date,33], level=['date','series'])\n",
- "today[['exp_percentage', 'predict', 'mispricing']]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#plot mispricing of a tranche through time \n",
- "attach = 0\n",
- "series = 33\n",
- "to_plot = gini_calc.xs([attach, series], level=['attach', 'series'])['mispricing']\n",
- "to_plot.reset_index(['index','tenor'], drop=True).unstack().plot()"
+ "gini_model, gini_results = {}, {}\n",
+ "conn = dbconn(\"serenitasdb\")\n",
+ "conn.autocommit = True\n",
+ "for index_type in ['HY', 'IG', 'EU', 'XO']:\n",
+ " risk = disp.get_tranche_data(dbconn(\"serenitasdb\"), index_type)\n",
+ " risk = risk[risk.index_duration > .5] #filter out the short duration ones\n",
+ " gini_results[index_type], gini_model[index_type] = disp.create_models_v2(conn, risk)\n",
+ " fitted = gini_model[index_type].fit()\n",
+ " w = 1/(expit(fitted.fittedvalues + fitted.resid) -expit(fitted.fittedvalues))**2\n",
+ " gini_results[index_type], gini_model[index_type] = disp.create_models_v2(conn, risk, w)\n",
+ "gini_model['HY'].fit().summary()"
]
},
{
@@ -111,7 +61,9 @@
"metadata": {},
"outputs": [],
"source": [
- "gini_calc.xs([attach, series], level=['attach', 'series']).to_clipboard()"
+ "fieldlist = ['exp_percentage','dispersion','gini','tranche_loss_per','mispricing']\n",
+ "for index_type in ['HY', 'IG', 'EU', 'XO']:\n",
+ " gini_results[index_type][fieldlist].to_csv('/home/serenitas/edwin/' + index_type + '_results.csv')"
]
},
{
@@ -120,10 +72,12 @@
"metadata": {},
"outputs": [],
"source": [
- "#plot mispricing of series through time \n",
- "series = 33\n",
- "to_plot = gini_calc.xs(series, level='series')['mispricing']\n",
- "to_plot.reset_index(['index','tenor'], drop=True).unstack().plot()"
+ "#Run a particular gini scenario\n",
+ "scenario = gini_results['HY'].loc(axis=0)[value_date,'HY',33,:,'5yr',0]\n",
+ "scenario['gini'].iloc[0] = .7\n",
+ "scenario_disp = expit(gini_model['HY'].fit().predict(scenario))\n",
+ "mispricing = scenario['tranche_loss_per'] - scenario_disp\n",
+ "mispricing"
]
},
{
@@ -132,7 +86,18 @@
"metadata": {},
"outputs": [],
"source": [
- "plot_fit(gini_model[0], 'np.log(index_duration)')"
+ "#plot the residuals\n",
+ "fitted = gini_model['HY'].fit()\n",
+ "plt.figure(figsize=(8,5))\n",
+ "p=plt.scatter(x=expit(fitted.fittedvalues),y=expit(fitted.fittedvalues + fitted.resid) -expit(fitted.fittedvalues),edgecolor='k')\n",
+ "xmin=min(expit(fitted.fittedvalues))\n",
+ "xmax = max(expit(fitted.fittedvalues))\n",
+ "plt.hlines(y=0,xmin=xmin*0.9,xmax=xmax*1.1,color='red',linestyle='--',lw=3)\n",
+ "plt.xlabel(\"Fitted values\",fontsize=15)\n",
+ "plt.ylabel(\"Residuals\",fontsize=15)\n",
+ "plt.title(\"Fitted vs. residuals plot\",fontsize=18)\n",
+ "plt.grid(True)\n",
+ "plt.show()"
]
},
{
@@ -141,9 +106,17 @@
"metadata": {},
"outputs": [],
"source": [
- "to_csv('/home/serenitas/edwin/Python/temp1.csv')\n",
- "\n",
- "gini_calc.to_csv('/home/serenitas/edwin/Python/' + index_type+ '_tranche_model.csv')"
+ "value_date = (datetime.datetime.today() - pd.offsets.BDay(1)).date()\n",
+ "start = (datetime.datetime.today() - pd.offsets.BDay(1) * 365 *4).date()\n",
+ "#end = (start + pd.offsets.BDay(1) * 365).date()\n",
+ "end = datetime.datetime.today()\n",
+ "index_type = 'IG'\n",
+ "serenitasconn = dbconn(\"serenitasdb\")\n",
+ "serenitasconn.autocommit = True\n",
+ "risk = disp.get_tranche_data(serenitasconn, index_type)\n",
+ "train_data = risk[start: end]\n",
+ "gini_calc, gini_model = disp.create_models(serenitasconn, train_data)\n",
+ "gini_model.fit().summary()"
]
},
{
@@ -187,212 +160,6 @@
"source": [
"tranche_returns.xs(29, level='series').unstack(level='attach').to_csv('/home/serenitas/edwin/Python/temp1.csv')"
]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#compare models\n",
- "a = [True, False]\n",
- "for years in [1,2,3,4,5,6]:\n",
- " date_range = pd.bdate_range(end=value_date, freq='5B',periods=52*years)\n",
- " risk = disp.get_tranche_data(index_type, serenitas_engine)\n",
- " risk = risk[risk.index.get_level_values(0).isin(date_range)]\n",
- " for x, y in list(itertools.product(a,a)):\n",
- " gini_model, gini_calc = disp.create_models(risk, use_gini=x, use_log=y)\n",
- " for i, m in gini_model.items():\n",
- " print (years, x, y, i, m.rsquared)\n",
- " today = gini_calc.xs([value_date,33], level=['date','series'])\n",
- " print (today[['exp_percentage', 'predict_N', 'predict_preN', 'mispricing']])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#Run a particular gini scenario\n",
- "scenario = gini_calc.loc(axis=0)[value_date,33,'HY','5yr',0]\n",
- "scenario['dispersion'] = .6\n",
- "scenario_disp = np.exp(gini_model[0].predict(scenario))\n",
- "mispricing = (scenario['exp_percentage'] - scenario_disp) * \\\n",
- " scenario['index_expected_loss'] / \\\n",
- " (scenario['detach_adj'] - scenario['attach_adj']) / \\\n",
- " scenario['indexfactor'] * 10000\n",
- "mispricing"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "gini_calc.loc(axis=0)[:,33,'HY','5yr',0]['mispricing']"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#Let's use a GAM model instead?\n",
- "#only use the 5yr point for modeling\n",
- "equity = gini_calc.loc(axis=0)[:,:,[25,27,29,31,33],'5yr',0]\n",
- "X = np.array(equity[['gini_spread', 'duration', 'moneyness']])\n",
- "y = np.array(equity['exp_percentage'])\n",
- "\n",
- "#Fit for Lamda\n",
- "gam_model = GAM(s(0, n_splines=5) +\n",
- " s(1, n_splines=5) +\n",
- " s(2, n_splines=5))\n",
- "lam = np.logspace(-3, 5, 5, base=3)\n",
- "lams = [lam] * 3\n",
- "gam_model.gridsearch(X, y, lam=lams)\n",
- "\n",
- "gam_model.summary()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "## plotting\n",
- "fig, axs = plt.subplots(1,3);\n",
- "\n",
- "titles = ['gini_spread', 'duration', 'moneyness']\n",
- "for i, ax in enumerate(axs):\n",
- " XX = gam_model.generate_X_grid(term=i)\n",
- " ax.plot(XX[:, i], gam_model.partial_dependence(term=i, X=XX))\n",
- " ax.plot(XX[:, i], gam_model.partial_dependence(term=i, X=XX, width=.95)[1], c='r', ls='--')\n",
- " if i == 0:\n",
- " ax.set_ylim(-30,30)\n",
- " ax.set_title(titles[i]);"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "plt.scatter(y, gam_model.predict(X))\n",
- "plt.xlabel('actual correlation')\n",
- "plt.ylabel('predicted correlation')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "today = gini_calc.loc(axis=0)[value_date,'HY',33,'5yr',0]\n",
- "predict_HY33 = gam_model.predict(np.array(today[['gini_spread', 'duration', 'moneyness']]))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "today, predict_HY33"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "deltas = []\n",
- "for s in portf.swaptions:\n",
- " deltas.append(s.delta)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "value_date = (datetime.datetime.today() - pd.offsets.BDay(1)).date()\n",
- "start = (datetime.datetime.today() - pd.offsets.BDay(1) * 365 *4).date()\n",
- "#end = (start + pd.offsets.BDay(1) * 365).date()\n",
- "end = datetime.datetime.today()\n",
- "gini_model, gini_results = {}, {}\n",
- "conn = dbconn(\"serenitasdb\")\n",
- "conn.autocommit = True\n",
- "for index_type in ['HY', 'IG', 'EU', 'XO']:\n",
- " risk = disp.get_tranche_data(dbconn(\"serenitasdb\"), index_type)\n",
- " #gini_results[index_type], gini_model[index_type] = disp.create_separate_models(risk)\n",
- " gini_results[index_type], gini_model[index_type] = disp.create_models_v2(conn, risk)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#gini_model['HY'][0].summary()\n",
- "gini_model['HY'].fit().summary()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "gini_results['HY']"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "fieldlist = ['exp_percentage','dispersion','gini','tranche_loss_per','mispricing']\n",
- "for index_type in ['HY', 'IG', 'EU', 'XO']:\n",
- " gini_results[index_type][fieldlist].to_csv('/home/serenitas/edwin/' + index_type + '_results.csv')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
}
],
"metadata": {