1 files changed, 195 insertions, 46 deletions
diff --git a/python/notebooks/Single Names Monitoring.ipynb b/python/notebooks/Single Names Monitoring.ipynb
index 1f5643e4..8b734db9 100644
--- a/python/notebooks/Single Names Monitoring.ipynb
+++ b/python/notebooks/Single Names Monitoring.ipynb
@@ -8,37 +8,16 @@
    "source": [
     "import pandas as pd\n",
     "import numpy as np\n",
+    "import datetime\n",
+    "\n",
     "from analytics.basket_index import MarkitBasketIndex\n",
+    "from analytics import on_the_run\n",
     "import matplotlib.pyplot as plt\n",
     "\n",
     "from utils.db import dbengine\n",
-    "from ipywidgets import widgets\n",
-    "engine = dbengine('serenitasdb')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "w = widgets.Dropdown(\n",
-    "    options=['IG', 'HY', 'EU', 'XO'],\n",
-    "    value='IG',\n",
-    "    description='Index:',\n",
-    "    disabled=False,\n",
-    ")\n",
-    "w"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "w_1 = widgets.IntSlider(value=32, min=22, max=32, description = 'Series')\n",
-    "w_1"
+    "serenitas_engine = dbengine('serenitasdb')\n",
+    "\n",
+    "%matplotlib inline"
    ]
   },
   {
@@ -47,9 +26,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "value_date = (pd.datetime.today() - pd.offsets.BDay(2)).date()\n",
-    "index_type = w.value\n",
-    "series = w_1.value"
+    "value_date = (pd.datetime.today() - pd.offsets.BDay(1)).date()\n",
+    "index_type = 'HY'\n",
+    "series = 32"
    ]
   },
   {
@@ -59,8 +38,20 @@
    "outputs": [],
    "source": [
     "sql_string = \"select * from index_members(%s, %s)\"\n",
-    "df = pd.read_sql_query(sql_string, engine, params=(index_type + str(series), value_date), index_col=['markit_ticker'])\n",
-    "df1 = pd.read_sql_query(sql_string, engine, params=(index_type + str(series-2), value_date), index_col=['markit_ticker'])"
+    "\n",
+    "df = pd.read_sql_query(sql_string, serenitas_engine, params=(index_type + str(series), value_date), index_col=['markit_ticker'])\n",
+    "df1 = pd.read_sql_query(sql_string, serenitas_engine, params=(index_type + str(series-2), value_date), index_col=['markit_ticker'])\n",
+    "\n",
+    "default_prob = {}\n",
+    "for s in [series, series-2]:\n",
+    "    index = MarkitBasketIndex(index_type, s, ['5yr'])\n",
+    "    surv_prob, tickers = index.survival_matrix()\n",
+    "    default_prob[s] = pd.Series(1 - np.ravel(surv_prob), index=tickers)\n",
+    "default_prob = pd.concat(default_prob, names=['series', 'markit_ticker'])\n",
+    "default_prob.name = 'default_prob'\n",
+    "\n",
+    "df = df.merge(default_prob.loc[series], left_index=True, right_index = True)\n",
+    "df1 = df1.merge(default_prob.loc[series-2], left_index=True, right_index = True)"
    ]
   },
   {
@@ -89,7 +80,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "date_range = pd.bdate_range(value_date - 52 * pd.offsets.Week(), value_date, freq='5B')\n",
+    "date_range = pd.bdate_range(value_date - 52 * .5 * pd.offsets.Week(), value_date, freq='5B')\n",
     "index = MarkitBasketIndex(index_type, series, ['5yr'])\n",
     "default_prob = {}\n",
     "for d in date_range:\n",
@@ -118,24 +109,182 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "jupyter": {
-     "source_hidden": true
-    }
-   },
+   "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "tenors = ['3yr', '5yr', '7yr', '10yr']\n",
+    "#index_type = 'IG'\n",
+    "#series = 26\n",
+    "indices = MarkitBasketIndex(index_type, series, tenors)\n",
+    "indices.value_date = datetime.date.today()\n",
+    "today_surv_prob, tickers = indices.survival_matrix()\n",
+    "today_default_prob = pd.DataFrame(1 - today_surv_prob, index=tickers, columns=tenors)"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "jupyter": {
-     "source_hidden": true
-    }
-   },
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Dispersion: std_dev of default_prob/average default_prob\n",
+    "date_range = pd.bdate_range(value_date - 52 * 4 * pd.offsets.Week(), value_date, freq='5B')\n",
+    "default_prob = {}\n",
+    "ontr = on_the_run(index_type, date_range[0])\n",
+    "index = MarkitBasketIndex(index_type, ontr, ['5yr'])\n",
+    "for d in date_range:\n",
+    "    if ontr != on_the_run(index_type, d):\n",
+    "        ontr = on_the_run(index_type, d)\n",
+    "        index = MarkitBasketIndex(index_type, ontr, ['5yr'])\n",
+    "    try:\n",
+    "        index.value_date = d\n",
+    "        surv_prob, tickers = index.survival_matrix()\n",
+    "        default_prob[d] = pd.Series(1 - np.ravel(surv_prob), index=tickers)\n",
+    "    except:\n",
+    "        continue\n",
+    "default_prob = pd.concat(default_prob)\n",
+    "dispersion = default_prob.unstack(level=0)\n",
+    "dispersion = dispersion.std()/dispersion.mean()\n",
+    "dispersion.plot()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def gini(array):\n",
+    "    \"\"\"Calculate the Gini coefficient of a numpy array.\"\"\"\n",
+    "    array = array.values\n",
+    "    # based on bottom eq: http://www.statsdirect.com/help/content/image/stat0206_wmf.gif\n",
+    "    # from: http://www.statsdirect.com/help/default.htm#nonparametric_methods/gini.htm\n",
+    "    if np.amin(array) < 0:\n",
+    "        array -= np.amin(array) #values cannot be negative\n",
+    "    array += 0.0000001 #values cannot be 0\n",
+    "    array = np.sort(array) #values must be sorted\n",
+    "    index = np.arange(1,array.shape[0]+1) #index per array element\n",
+    "    n = array.shape[0]#number of array elements\n",
+    "    return ((np.sum((2 * index - n  - 1) * array)) / (n * np.sum(array))) #Gini coefficient"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "#Instead of std dev of spread get Gini Factor default prob\n",
+    "sql_string = \"select * from index_version where index = %s\"\n",
+    "idx_ver = pd.read_sql_query(sql_string, serenitas_engine, params=[index_type,], parse_dates=['lastdate'])\n",
+    "idx_ver['date'] = pd.to_datetime([d.strftime('%Y-%m-%d') if not pd.isnull(d) else datetime.date(2050,1,1) for d in idx_ver['lastdate']])\n",
+    "sql_string = \"select * from risk_numbers where index = %s\"\n",
+    "risk = pd.read_sql_query(sql_string, serenitas_engine, parse_dates={'date': {'utc':True}}, params=[index_type])\n",
+    "risk.date = risk.date.dt.normalize().dt.tz_convert(None)\n",
+    "risk = risk.groupby(['date','index','series','tenor','attach']).mean()\n",
+    "risk.reset_index(inplace=True)\n",
+    "idx_ver.sort_values(by=['date'], inplace=True, ascending=True)\n",
+    "risk = pd.merge_asof(risk, idx_ver[['date','series','cumulativeloss','indexfactor']], left_on=['date'], right_on=['date'], by='series', direction='forward')\n",
+    "risk.set_index('date', inplace=True) \n",
+    "risk['moneyness'] = risk.apply(lambda df: (df.detach-df.cumulativeloss)/df.indexfactor/df.index_expected_loss, axis=1)\n",
+    "\n",
+    "single_day_risk = {}\n",
+    "date_range = pd.bdate_range(value_date - 52 * 5 * pd.offsets.Week(), value_date, freq='5B')\n",
+    "for d in date_range:\n",
+    "    default_prob={}\n",
+    "    try:\n",
+    "        df = risk.loc[d]\n",
+    "    except:\n",
+    "        continue\n",
+    "    for s in df.series.unique():\n",
+    "        tenors = list(df[df.series==s]['tenor'].sort_values().unique())\n",
+    "        indices = MarkitBasketIndex(index_type, s, tenors)\n",
+    "        try:\n",
+    "            indices.value_date = d\n",
+    "            surv_prob, tickers = indices.survival_matrix()\n",
+    "            default_prob[s] = pd.DataFrame(1 - surv_prob, index=tickers, columns=tenors)\n",
+    "        except:\n",
+    "            continue\n",
+    "    if default_prob:\n",
+    "        default_prob = pd.concat(default_prob, names=['series', 'name'], sort=True)\n",
+    "        default_prob.columns.name = 'tenor'\n",
+    "        gini_coeff = default_prob.stack().groupby(['series', 'tenor']).apply(gini)\n",
+    "        single_day_risk[d] = df.merge(gini_coeff.rename('gini_coeff').reset_index(), on=['series', 'tenor'])\n",
+    "tranche_risk = pd.concat(single_day_risk, names=['date', 'idx'], sort=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "to_plot_gini = tranche_risk[(tranche_risk.tenor == '5yr') & (tranche_risk.attach ==0)].groupby(['date', 'series']).nth(-1)\n",
+    "to_plot_gini['gini_coeff'].unstack().plot()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import statsmodels.formula.api as smf\n",
+    "equity = tranche_risk[tranche_risk.attach==0]\n",
+    "#use a subset for modeling purposes?\n",
+    "equity = equity[(equity.tenor=='5yr') & (equity.series >= 27)]\n",
+    "gini_model = smf.gls(\"corr_at_detach ~ gini_coeff + duration + moneyness\", data=equity).fit()\n",
+    "gini_model.summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "predict_today = equity.reset_index()[['gini_coeff', 'duration', 'moneyness']].iloc[-1]\n",
+    "spread_gls_model.predict(predict_today)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Let's use a GAM model instead?\n",
+    "from pygam import LinearGAM, s, f\n",
+    "X = np.array(equity[['gini_coeff', 'duration', 'moneyness']])\n",
+    "y = np.array(equity['corr_at_detach'])\n",
+    "\n",
+    "gam_model = LinearGAM(s(0) + s(1) + s(2))\n",
+    "lam = np.logspace(-3, 5, 5, base=3)\n",
+    "lams = [lam] * 3\n",
+    "\n",
+    "gam_model.gridsearch(X, y, lam=lams)\n",
+    "gam_model.summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## plotting\n",
+    "plt.figure();\n",
+    "fig, axs = plt.subplots(1,3);\n",
+    "\n",
+    "titles = ['gini_coeff', 'duration', 'moneyness']\n",
+    "for i, ax in enumerate(axs):\n",
+    "    XX = gam_model.generate_X_grid(term=i)\n",
+    "    ax.plot(XX[:, i], gam_model.partial_dependence(term=i, X=XX))\n",
+    "    ax.plot(XX[:, i], gam_model.partial_dependence(term=i, X=XX, width=.95)[1], c='r', ls='--')\n",
+    "    if i == 0:\n",
+    "        ax.set_ylim(-30,30)\n",
+    "    ax.set_title(titles[i]);"
+   ]
   }
  ],
  "metadata": {
@@ -154,7 +303,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.7.4"
   }
  },
  "nbformat": 4,