{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import datetime\n", "\n", "from analytics.basket_index import MarkitBasketIndex\n", "from analytics import on_the_run\n", "import matplotlib.pyplot as plt\n", "\n", "from utils.db import dbengine\n", "serenitas_engine = dbengine('serenitasdb')\n", "\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "value_date = (pd.datetime.today() - pd.offsets.BDay(1)).date()\n", "index_type = 'HY'\n", "series = 32" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sql_string = \"select * from index_members(%s, %s)\"\n", "\n", "df = pd.read_sql_query(sql_string, serenitas_engine, params=(index_type + str(series), value_date), index_col=['markit_ticker'])\n", "df1 = pd.read_sql_query(sql_string, serenitas_engine, params=(index_type + str(series-2), value_date), index_col=['markit_ticker'])\n", "\n", "default_prob = {}\n", "for s in [series, series-2]:\n", " index = MarkitBasketIndex(index_type, s, ['5yr'])\n", " surv_prob, tickers = index.survival_matrix()\n", " default_prob[s] = pd.Series(1 - np.ravel(surv_prob), index=tickers)\n", "default_prob = pd.concat(default_prob, names=['series', 'markit_ticker'])\n", "default_prob.name = 'default_prob'\n", "\n", "df = df.merge(default_prob.loc[series], left_index=True, right_index = True)\n", "df1 = df1.merge(default_prob.loc[series-2], left_index=True, right_index = True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Removals\n", "df1.loc[df1.index.difference(df.index)]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Additions\n", "df.loc[df.index.difference(df1.index)]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "date_range = pd.bdate_range(value_date - 52 * .5 * pd.offsets.Week(), value_date, freq='5B')\n", "index = MarkitBasketIndex(index_type, series, ['5yr'])\n", "default_prob = {}\n", "for d in date_range:\n", " index.value_date = d\n", " surv_prob, tickers = index.survival_matrix()\n", " default_prob[d] = pd.Series(1 - np.ravel(surv_prob), index=tickers)\n", "default_prob = pd.concat(default_prob)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Top 20 highest cumulative\n", "top20 = default_prob.unstack(-1)[default_prob[value_date].nlargest(20).index]\n", "top20.index.name='date'\n", "top20.columns.name='tickers'\n", "ax = top20.plot(title=f'market implied default probabilities to {index.maturities[0]}', figsize=(10,6))\n", "ax.legend(loc='upper center', bbox_to_anchor=(1.3, 1), ncol=1)\n", "ax.set(xlabel='date', ylabel='probability')\n", "plt.tight_layout()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tenors = ['3yr', '5yr', '7yr', '10yr']\n", "#index_type = 'IG'\n", "#series = 26\n", "indices = MarkitBasketIndex(index_type, series, tenors)\n", "indices.value_date = datetime.date.today()\n", "today_surv_prob, tickers = indices.survival_matrix()\n", "today_default_prob = pd.DataFrame(1 - today_surv_prob, index=tickers, columns=tenors)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Dispersion: std_dev of default_prob/average default_prob\n", "date_range = pd.bdate_range(value_date - 52 * 4 * pd.offsets.Week(), value_date, freq='5B')\n", "default_prob = {}\n", "ontr = on_the_run(index_type, date_range[0])\n", "index = MarkitBasketIndex(index_type, ontr, ['5yr'])\n", "for d in date_range:\n", " if ontr != on_the_run(index_type, d):\n", " ontr = on_the_run(index_type, d)\n", " index = MarkitBasketIndex(index_type, ontr, ['5yr'])\n", " try:\n", " index.value_date = d\n", " surv_prob, tickers = index.survival_matrix()\n", " default_prob[d] = pd.Series(1 - np.ravel(surv_prob), index=tickers)\n", " except:\n", " continue\n", "default_prob = pd.concat(default_prob)\n", "dispersion = default_prob.unstack(level=0)\n", "dispersion = dispersion.std()/dispersion.mean()\n", "dispersion.plot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def gini(array):\n", " \"\"\"Calculate the Gini coefficient of a numpy array.\"\"\"\n", " array = array.values\n", " # based on bottom eq: http://www.statsdirect.com/help/content/image/stat0206_wmf.gif\n", " # from: http://www.statsdirect.com/help/default.htm#nonparametric_methods/gini.htm\n", " if np.amin(array) < 0:\n", " array -= np.amin(array) #values cannot be negative\n", " array += 0.0000001 #values cannot be 0\n", " array = np.sort(array) #values must be sorted\n", " index = np.arange(1,array.shape[0]+1) #index per array element\n", " n = array.shape[0]#number of array elements\n", " return ((np.sum((2 * index - n - 1) * array)) / (n * np.sum(array))) #Gini coefficient" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Instead of std dev of spread get Gini Factor default prob\n", "sql_string = \"select * from index_version where index = %s\"\n", "idx_ver = pd.read_sql_query(sql_string, serenitas_engine, params=[index_type,], parse_dates=['lastdate'])\n", "idx_ver['date'] = pd.to_datetime([d.strftime('%Y-%m-%d') if not pd.isnull(d) else datetime.date(2050,1,1) for d in idx_ver['lastdate']])\n", "sql_string = \"select * from risk_numbers where index = %s\"\n", "risk = pd.read_sql_query(sql_string, serenitas_engine, parse_dates={'date': {'utc':True}}, params=[index_type])\n", "risk.date = risk.date.dt.normalize().dt.tz_convert(None)\n", "risk = risk.groupby(['date','index','series','tenor','attach']).mean()\n", "risk.reset_index(inplace=True)\n", "idx_ver.sort_values(by=['date'], inplace=True, ascending=True)\n", "risk = pd.merge_asof(risk, idx_ver[['date','series','cumulativeloss','indexfactor']], left_on=['date'], right_on=['date'], by='series', direction='forward')\n", "risk.set_index('date', inplace=True) \n", "risk['moneyness'] = risk.apply(lambda df: (df.detach-df.cumulativeloss)/df.indexfactor/df.index_expected_loss, axis=1)\n", "\n", "single_day_risk = {}\n", "date_range = pd.bdate_range(value_date - 52 * 5 * pd.offsets.Week(), value_date, freq='5B')\n", "for d in date_range:\n", " default_prob={}\n", " try:\n", " df = risk.loc[d]\n", " except:\n", " continue\n", " for s in df.series.unique():\n", " tenors = list(df[df.series==s]['tenor'].sort_values().unique())\n", " indices = MarkitBasketIndex(index_type, s, tenors)\n", " try:\n", " indices.value_date = d\n", " surv_prob, tickers = indices.survival_matrix()\n", " default_prob[s] = pd.DataFrame(1 - surv_prob, index=tickers, columns=tenors)\n", " except:\n", " continue\n", " if default_prob:\n", " default_prob = pd.concat(default_prob, names=['series', 'name'], sort=True)\n", " default_prob.columns.name = 'tenor'\n", " gini_coeff = default_prob.stack().groupby(['series', 'tenor']).apply(gini)\n", " single_day_risk[d] = df.merge(gini_coeff.rename('gini_coeff').reset_index(), on=['series', 'tenor'])\n", "tranche_risk = pd.concat(single_day_risk, names=['date', 'idx'], sort=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "to_plot_gini = tranche_risk[(tranche_risk.tenor == '5yr') & (tranche_risk.attach ==0)].groupby(['date', 'series']).nth(-1)\n", "to_plot_gini['gini_coeff'].unstack().plot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import statsmodels.formula.api as smf\n", "equity = tranche_risk[tranche_risk.attach==0]\n", "#use a subset for modeling purposes?\n", "equity = equity[(equity.tenor=='5yr') & (equity.series >= 27)]\n", "gini_model = smf.gls(\"corr_at_detach ~ gini_coeff + duration + moneyness\", data=equity).fit()\n", "gini_model.summary()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "predict_today = equity.reset_index()[['gini_coeff', 'duration', 'moneyness']].iloc[-1]\n", "spread_gls_model.predict(predict_today)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Let's use a GAM model instead?\n", "from pygam import LinearGAM, s, f\n", "X = np.array(equity[['gini_coeff', 'duration', 'moneyness']])\n", "y = np.array(equity['corr_at_detach'])\n", "\n", "gam_model = LinearGAM(s(0) + s(1) + s(2))\n", "lam = np.logspace(-3, 5, 5, base=3)\n", "lams = [lam] * 3\n", "\n", "gam_model.gridsearch(X, y, lam=lams)\n", "gam_model.summary()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "## plotting\n", "plt.figure();\n", "fig, axs = plt.subplots(1,3);\n", "\n", "titles = ['gini_coeff', 'duration', 'moneyness']\n", "for i, ax in enumerate(axs):\n", " XX = gam_model.generate_X_grid(term=i)\n", " ax.plot(XX[:, i], gam_model.partial_dependence(term=i, X=XX))\n", " ax.plot(XX[:, i], gam_model.partial_dependence(term=i, X=XX, width=.95)[1], c='r', ls='--')\n", " if i == 0:\n", " ax.set_ylim(-30,30)\n", " ax.set_title(titles[i]);" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 4 }