diff options
Diffstat (limited to 'python/notebooks/Dispersion.ipynb')
| -rw-r--r-- | python/notebooks/Dispersion.ipynb | 202 |
1 files changed, 202 insertions, 0 deletions
diff --git a/python/notebooks/Dispersion.ipynb b/python/notebooks/Dispersion.ipynb new file mode 100644 index 00000000..53fe6993 --- /dev/null +++ b/python/notebooks/Dispersion.ipynb @@ -0,0 +1,202 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import datetime\n", + "import exploration.dispersion as disp\n", + "import matplotlib.pyplot as plt\n", + "import statsmodels.formula.api as smf\n", + "\n", + "from analytics.basket_index import MarkitBasketIndex\n", + "from analytics import on_the_run\n", + "from statsmodels.graphics.regressionplots import plot_fit\n", + "from pygam import LinearGAM, s, f, GAM\n", + "from utils.db import dbengine\n", + "\n", + "serenitas_engine = dbengine('serenitasdb')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "value_date = (pd.datetime.today() - pd.offsets.BDay(1)).date()\n", + "index_type = 'HY'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Get Gini factor\n", + "risk = disp.get_tranche_data(index_type, serenitas_engine)\n", + "date_range = pd.bdate_range(end=value_date, freq='5B',periods=52*4)\n", + "risk = risk[(risk.index.isin(date_range))]\n", + "temp = risk.apply(lambda x: disp.get_gini_spreadstdev(x['index'], x.series, [x.tenor], x.name), axis=1)\n", + "risk[['gini_spread', 'std_spread']] = pd.DataFrame(temp.values.tolist(), columns=['gini_spread','std_spread'], index=temp.index)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gini_calc = risk[(risk.index.isin(date_range)) & (risk.attach == 0)]\n", + "to_plot_gini = gini_calc[(gini_calc.tenor == '5yr')].groupby(['date', 'series']).nth(-1)\n", + "to_plot_gini['gini_spread'].unstack().plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "##### model expected loss percentage instead of correlation -- equity\n", + "gini_calc = risk[(risk.attach == 0)]\n", + "gini_model = smf.ols(\"np.log(exp_percentage) ~ np.log(gini_spread) + np.log(duration) + np.log(moneyness)\", data=gini_calc).fit()\n", + "gini_model.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plot_fit(gini_model, 'np.log(moneyness)')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gini_calc['predict'] = np.exp(gini_model.predict(gini_calc))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "today = gini_calc[(gini_calc['index'] == index_type) & (gini_calc['series'] == 33)].loc[value_date]\n", + "today" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Let's use a GAM model instead?\n", + "#only use the 5yr point for modeling\n", + "equity = gini_calc[(gini_calc.tenor=='5yr') & (gini_calc.series >= 23)]\n", + "X = np.array(equity[['gini_spread', 'duration', 'moneyness']])\n", + "y = np.array(equity['exp_percentage'])\n", + "\n", + "#Fit for Lamda\n", + "gam_model = GAM(s(0, n_splines=5) +\n", + " s(1, n_splines=5) +\n", + " s(2, n_splines=5))\n", + "lam = np.logspace(-3, 5, 5, base=3)\n", + "lams = [lam] * 3\n", + "gam_model.gridsearch(X, y, lam=lams)\n", + "\n", + "gam_model.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## plotting\n", + "fig, axs = plt.subplots(1,3);\n", + "\n", + "titles = ['gini_spread', 'duration', 'moneyness']\n", + "for i, ax in enumerate(axs):\n", + " XX = gam_model.generate_X_grid(term=i)\n", + " ax.plot(XX[:, i], gam_model.partial_dependence(term=i, X=XX))\n", + " ax.plot(XX[:, i], gam_model.partial_dependence(term=i, X=XX, width=.95)[1], c='r', ls='--')\n", + " if i == 0:\n", + " ax.set_ylim(-30,30)\n", + " ax.set_title(titles[i]);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.scatter(y, gam_model.predict(X))\n", + "plt.xlabel('actual correlation')\n", + "plt.ylabel('predicted correlation')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "today = (equity.loc[max(equity.index)])\n", + "predict_HY33 = gam_model.predict(np.array(today[today.series==33][['gini_spread', 'duration', 'moneyness']]))\n", + "today[today.series==31][['exp_percentage']], predict_HY33" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} |
