1 files changed, 202 insertions, 0 deletions
diff --git a/python/notebooks/Dispersion.ipynb b/python/notebooks/Dispersion.ipynb
new file mode 100644
index 00000000..53fe6993
--- /dev/null
+++ b/python/notebooks/Dispersion.ipynb
@@ -0,0 +1,202 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import datetime\n",
+    "import exploration.dispersion as disp\n",
+    "import matplotlib.pyplot as plt\n",
+    "import statsmodels.formula.api as smf\n",
+    "\n",
+    "from analytics.basket_index import MarkitBasketIndex\n",
+    "from analytics import on_the_run\n",
+    "from statsmodels.graphics.regressionplots import plot_fit\n",
+    "from pygam import LinearGAM, s, f, GAM\n",
+    "from utils.db import dbengine\n",
+    "\n",
+    "serenitas_engine = dbengine('serenitasdb')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%matplotlib inline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "value_date = (pd.datetime.today() - pd.offsets.BDay(1)).date()\n",
+    "index_type = 'HY'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Get Gini factor\n",
+    "risk = disp.get_tranche_data(index_type, serenitas_engine)\n",
+    "date_range = pd.bdate_range(end=value_date, freq='5B',periods=52*4)\n",
+    "risk = risk[(risk.index.isin(date_range))]\n",
+    "temp = risk.apply(lambda x: disp.get_gini_spreadstdev(x['index'], x.series, [x.tenor], x.name), axis=1)\n",
+    "risk[['gini_spread', 'std_spread']] = pd.DataFrame(temp.values.tolist(), columns=['gini_spread','std_spread'], index=temp.index)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gini_calc = risk[(risk.index.isin(date_range)) & (risk.attach == 0)]\n",
+    "to_plot_gini = gini_calc[(gini_calc.tenor == '5yr')].groupby(['date', 'series']).nth(-1)\n",
+    "to_plot_gini['gini_spread'].unstack().plot()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##### model expected loss percentage instead of correlation -- equity\n",
+    "gini_calc = risk[(risk.attach == 0)]\n",
+    "gini_model = smf.ols(\"np.log(exp_percentage) ~ np.log(gini_spread) + np.log(duration) + np.log(moneyness)\", data=gini_calc).fit()\n",
+    "gini_model.summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_fit(gini_model, 'np.log(moneyness)')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gini_calc['predict'] = np.exp(gini_model.predict(gini_calc))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "today = gini_calc[(gini_calc['index'] == index_type) & (gini_calc['series'] == 33)].loc[value_date]\n",
+    "today"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Let's use a GAM model instead?\n",
+    "#only use the 5yr point for modeling\n",
+    "equity = gini_calc[(gini_calc.tenor=='5yr') & (gini_calc.series >= 23)]\n",
+    "X = np.array(equity[['gini_spread', 'duration', 'moneyness']])\n",
+    "y = np.array(equity['exp_percentage'])\n",
+    "\n",
+    "#Fit for Lamda\n",
+    "gam_model = GAM(s(0, n_splines=5) +\n",
+    "                  s(1, n_splines=5) +\n",
+    "                  s(2, n_splines=5))\n",
+    "lam = np.logspace(-3, 5, 5, base=3)\n",
+    "lams = [lam] * 3\n",
+    "gam_model.gridsearch(X, y, lam=lams)\n",
+    "\n",
+    "gam_model.summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## plotting\n",
+    "fig, axs = plt.subplots(1,3);\n",
+    "\n",
+    "titles = ['gini_spread', 'duration', 'moneyness']\n",
+    "for i, ax in enumerate(axs):\n",
+    "    XX = gam_model.generate_X_grid(term=i)\n",
+    "    ax.plot(XX[:, i], gam_model.partial_dependence(term=i, X=XX))\n",
+    "    ax.plot(XX[:, i], gam_model.partial_dependence(term=i, X=XX, width=.95)[1], c='r', ls='--')\n",
+    "    if i == 0:\n",
+    "        ax.set_ylim(-30,30)\n",
+    "    ax.set_title(titles[i]);"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.scatter(y, gam_model.predict(X))\n",
+    "plt.xlabel('actual correlation')\n",
+    "plt.ylabel('predicted correlation')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "today = (equity.loc[max(equity.index)])\n",
+    "predict_HY33 = gam_model.predict(np.array(today[today.series==33][['gini_spread', 'duration', 'moneyness']]))\n",
+    "today[today.series==31][['exp_percentage']], predict_HY33"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}