aboutsummaryrefslogtreecommitdiffstats
path: root/python/notebooks/Single Names Monitoring.ipynb
diff options
context:
space:
mode:
Diffstat (limited to 'python/notebooks/Single Names Monitoring.ipynb')
-rw-r--r--python/notebooks/Single Names Monitoring.ipynb280
1 files changed, 208 insertions, 72 deletions
diff --git a/python/notebooks/Single Names Monitoring.ipynb b/python/notebooks/Single Names Monitoring.ipynb
index 8b734db9..46230799 100644
--- a/python/notebooks/Single Names Monitoring.ipynb
+++ b/python/notebooks/Single Names Monitoring.ipynb
@@ -13,10 +13,19 @@
"from analytics.basket_index import MarkitBasketIndex\n",
"from analytics import on_the_run\n",
"import matplotlib.pyplot as plt\n",
+ "import statsmodels.formula.api as smf\n",
+ "from pygam import LinearGAM, s, f, GAM\n",
"\n",
"from utils.db import dbengine\n",
- "serenitas_engine = dbengine('serenitasdb')\n",
- "\n",
+ "serenitas_engine = dbengine('serenitasdb')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
"%matplotlib inline"
]
},
@@ -27,8 +36,8 @@
"outputs": [],
"source": [
"value_date = (pd.datetime.today() - pd.offsets.BDay(1)).date()\n",
- "index_type = 'HY'\n",
- "series = 32"
+ "index_type = 'XO'\n",
+ "series = 28"
]
},
{
@@ -127,36 +136,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#Dispersion: std_dev of default_prob/average default_prob\n",
- "date_range = pd.bdate_range(value_date - 52 * 4 * pd.offsets.Week(), value_date, freq='5B')\n",
- "default_prob = {}\n",
- "ontr = on_the_run(index_type, date_range[0])\n",
- "index = MarkitBasketIndex(index_type, ontr, ['5yr'])\n",
- "for d in date_range:\n",
- " if ontr != on_the_run(index_type, d):\n",
- " ontr = on_the_run(index_type, d)\n",
- " index = MarkitBasketIndex(index_type, ontr, ['5yr'])\n",
- " try:\n",
- " index.value_date = d\n",
- " surv_prob, tickers = index.survival_matrix()\n",
- " default_prob[d] = pd.Series(1 - np.ravel(surv_prob), index=tickers)\n",
- " except:\n",
- " continue\n",
- "default_prob = pd.concat(default_prob)\n",
- "dispersion = default_prob.unstack(level=0)\n",
- "dispersion = dispersion.std()/dispersion.mean()\n",
- "dispersion.plot()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
"def gini(array):\n",
" \"\"\"Calculate the Gini coefficient of a numpy array.\"\"\"\n",
- " array = array.values\n",
" # based on bottom eq: http://www.statsdirect.com/help/content/image/stat0206_wmf.gif\n",
" # from: http://www.statsdirect.com/help/default.htm#nonparametric_methods/gini.htm\n",
" if np.amin(array) < 0:\n",
@@ -165,7 +146,7 @@
" array = np.sort(array) #values must be sorted\n",
" index = np.arange(1,array.shape[0]+1) #index per array element\n",
" n = array.shape[0]#number of array elements\n",
- " return ((np.sum((2 * index - n - 1) * array)) / (n * np.sum(array))) #Gini coefficient"
+ " return ((np.sum((2 * index - n - 1) * array)) / (n * np.sum(array))) "
]
},
{
@@ -174,7 +155,159 @@
"metadata": {},
"outputs": [],
"source": [
- "#Instead of std dev of spread get Gini Factor default prob\n",
+ "def get_gini_spreadstdev(row):\n",
+ " indices = MarkitBasketIndex(row['index'], row.series, [row.tenor], value_date = row.name)\n",
+ " spreads = indices.spreads()\n",
+ " spreads = spreads[spreads<1]\n",
+ " return (gini(spreads), np.std(spreads))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "####################### NAV Basis\n",
+ "\n",
+ "# HY | IG\n",
+ "#+ve index trades risk rich | index trades risk cheap\n",
+ "#-ve single trades risk rich | single trades risk cheap\n",
+ "\n",
+ "sql_string = \"select * from index_quotes where index = %s and tenor = '5yr'\"\n",
+ "df = pd.read_sql_query(sql_string, serenitas_engine, params=(index_type,), index_col=['date'])\n",
+ "df[\"dist_on_the_run\"] = df.groupby(\"date\")[\"series\"].transform(\n",
+ " lambda x: x.max() - x\n",
+ ")\n",
+ "df = df.groupby(['date', 'series']).nth(-1) #take the last version\n",
+ "df['basis'] = df.closespread - df.modelspread if index_type == 'IG' else df.closeprice - df.modelprice\n",
+ "df.set_index('dist_on_the_run', append=True, inplace=True)\n",
+ "df.reset_index('series', inplace=True)\n",
+ "basis = df['basis'].unstack()\n",
+ "stats = pd.DataFrame([basis.min(), basis.mean(), basis.max(), \n",
+ " basis.quantile(.01), basis.quantile(.05), basis.quantile(.95), basis.quantile(.99)],\n",
+ " index=['min', 'mean', 'max', \n",
+ " '1%tile', '5%tile', '95%tile', '99%tile'])\n",
+ "stats"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "####################### Get Gini on indices: this calc bombs a lot so let's do the ones that we were able to calc before (dropna)\n",
+ "df_gini_calc = df.dropna().loc[datetime.date(2019,1,1):, :].reset_index('dist_on_the_run')[\n",
+ " ['index','series', 'tenor', 'duration', 'basis', 'closespread']]\n",
+ "temp = df_gini_calc.apply(get_gini_spreadstdev, axis=1)\n",
+ "temp = pd.DataFrame(temp.values.tolist(), columns=['gini_spread','std_spread'], index=temp.index)\n",
+ "df_gini_calc = df_gini_calc.merge(temp, left_index=True, right_index=True).dropna()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#######################GLS regression of NAV basis to spread/duration\n",
+ "#basis_gini_model = smf.gls(\"basis ~ np.log(duration) + np.log(closespread) + np.log(gini_spread)\", data=df_gini_calc).fit()\n",
+ "#basis_gini_model.summary()\n",
+ "\n",
+ "#Let's use a GAM model instead?\n",
+ "X = np.array(df_gini_calc[['duration', 'closespread', 'gini_spread']])\n",
+ "y = np.array(df_gini_calc[['basis']])\n",
+ "\n",
+ "basis_model = GAM(s(0, constraints='concave') +\n",
+ " s(1, constraints='concave') +\n",
+ " s(2, constraints='concave'))\n",
+ "\n",
+ "lam = np.logspace(-3, 5, 5, base=10)\n",
+ "lams = [lam] * 3\n",
+ "\n",
+ "basis_model.gridsearch(X, y, lam=lams)\n",
+ "basis_model.summary()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "## plotting\n",
+ "fig, axs = plt.subplots(1,3);\n",
+ "\n",
+ "titles = ['duration', 'closespread', third_variable]\n",
+ "for i, ax in enumerate(axs):\n",
+ " XX = basis_model.generate_X_grid(term=i)\n",
+ " ax.plot(XX[:, i], basis_model.partial_dependence(term=i, X=XX))\n",
+ " ax.plot(XX[:, i], basis_model.partial_dependence(term=i, X=XX, width=.95)[1], c='r', ls='--')\n",
+ " if i == 0:\n",
+ " ax.set_ylim(-30,30)\n",
+ " ax.set_title(titles[i]);"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "############## predict\n",
+ "predict = basis_model.predict(X)\n",
+ "plt.scatter(y, predict)\n",
+ "plt.xlabel('actual basis')\n",
+ "plt.ylabel('predicted basis')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "############## today's basis\n",
+ "y[-1], predict[-1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#######################Dispersion: std_dev/mean of default_prob\n",
+ "date_range = pd.bdate_range(value_date - 52 * 4 * pd.offsets.Week(), value_date, freq='5B')\n",
+ "default_prob, index_spreads = {}, {}\n",
+ "for d in date_range:\n",
+ " try:\n",
+ " index = MarkitBasketIndex(index_type, on_the_run(index_type, d), ['5yr'], value_date =d)\n",
+ " surv_prob, tickers = index.survival_matrix()\n",
+ " spreads = index.spreads()\n",
+ " spreads = spreads[spreads<1] #filter out crazy spreads\n",
+ " default_prob[d] = pd.Series(1 - np.ravel(surv_prob), index=tickers)\n",
+ " index_spreads[d] = pd.Series(spreads, index=tickers)\n",
+ " except:\n",
+ " continue\n",
+ "default_prob = pd.concat(default_prob)\n",
+ "index_spreads = pd.concat(index_spreads)\n",
+ "dispersion = default_prob.unstack(level=0)\n",
+ "dispersion = dispersion.std()/dispersion.mean()\n",
+ "dispersion_spread = index_spreads.unstack(level=0)\n",
+ "dispersion_spread = dispersion_spread.std()/dispersion_spread.mean()\n",
+ "dispersion.plot()\n",
+ "dispersion_spread.plot()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#Get Gini factor\n",
"sql_string = \"select * from index_version where index = %s\"\n",
"idx_ver = pd.read_sql_query(sql_string, serenitas_engine, params=[index_type,], parse_dates=['lastdate'])\n",
"idx_ver['date'] = pd.to_datetime([d.strftime('%Y-%m-%d') if not pd.isnull(d) else datetime.date(2050,1,1) for d in idx_ver['lastdate']])\n",
@@ -188,29 +321,10 @@
"risk.set_index('date', inplace=True) \n",
"risk['moneyness'] = risk.apply(lambda df: (df.detach-df.cumulativeloss)/df.indexfactor/df.index_expected_loss, axis=1)\n",
"\n",
- "single_day_risk = {}\n",
- "date_range = pd.bdate_range(value_date - 52 * 5 * pd.offsets.Week(), value_date, freq='5B')\n",
- "for d in date_range:\n",
- " default_prob={}\n",
- " try:\n",
- " df = risk.loc[d]\n",
- " except:\n",
- " continue\n",
- " for s in df.series.unique():\n",
- " tenors = list(df[df.series==s]['tenor'].sort_values().unique())\n",
- " indices = MarkitBasketIndex(index_type, s, tenors)\n",
- " try:\n",
- " indices.value_date = d\n",
- " surv_prob, tickers = indices.survival_matrix()\n",
- " default_prob[s] = pd.DataFrame(1 - surv_prob, index=tickers, columns=tenors)\n",
- " except:\n",
- " continue\n",
- " if default_prob:\n",
- " default_prob = pd.concat(default_prob, names=['series', 'name'], sort=True)\n",
- " default_prob.columns.name = 'tenor'\n",
- " gini_coeff = default_prob.stack().groupby(['series', 'tenor']).apply(gini)\n",
- " single_day_risk[d] = df.merge(gini_coeff.rename('gini_coeff').reset_index(), on=['series', 'tenor'])\n",
- "tranche_risk = pd.concat(single_day_risk, names=['date', 'idx'], sort=True)"
+ "date_range = pd.bdate_range(value_date - 52 * 3 * pd.offsets.Week(), value_date, freq='5B')\n",
+ "gini_calc = risk[(risk.index.isin(date_range)) & (risk.attach == 0)]\n",
+ "temp = gini_calc.apply(get_gini_spreadstdev, axis=1)\n",
+ "gini_calc[['gini_spread', 'std_spread']] = pd.DataFrame(temp.values.tolist(), columns=['gini_spread','std_spread'], index=temp.index)"
]
},
{
@@ -219,8 +333,8 @@
"metadata": {},
"outputs": [],
"source": [
- "to_plot_gini = tranche_risk[(tranche_risk.tenor == '5yr') & (tranche_risk.attach ==0)].groupby(['date', 'series']).nth(-1)\n",
- "to_plot_gini['gini_coeff'].unstack().plot()"
+ "to_plot_gini = gini_calc[(gini_calc.tenor == '5yr')].groupby(['date', 'series']).nth(-1)\n",
+ "to_plot_gini['gini_spread'].unstack().plot()"
]
},
{
@@ -229,11 +343,7 @@
"metadata": {},
"outputs": [],
"source": [
- "import statsmodels.formula.api as smf\n",
- "equity = tranche_risk[tranche_risk.attach==0]\n",
- "#use a subset for modeling purposes?\n",
- "equity = equity[(equity.tenor=='5yr') & (equity.series >= 27)]\n",
- "gini_model = smf.gls(\"corr_at_detach ~ gini_coeff + duration + moneyness\", data=equity).fit()\n",
+ "gini_model = smf.gls(\"corr_at_detach ~ gini_spread + duration + moneyness\", data=equity).fit()\n",
"gini_model.summary()"
]
},
@@ -243,8 +353,8 @@
"metadata": {},
"outputs": [],
"source": [
- "predict_today = equity.reset_index()[['gini_coeff', 'duration', 'moneyness']].iloc[-1]\n",
- "spread_gls_model.predict(predict_today)"
+ "predict_today = equity.reset_index()[['gini_spread', 'duration', 'moneyness']].iloc[-1]\n",
+ "gini_model.predict(predict_today)"
]
},
{
@@ -254,15 +364,19 @@
"outputs": [],
"source": [
"#Let's use a GAM model instead?\n",
- "from pygam import LinearGAM, s, f\n",
- "X = np.array(equity[['gini_coeff', 'duration', 'moneyness']])\n",
+ "#only use the 5yr point for modeling\n",
+ "equity = gini_calc[(gini_calc.tenor=='5yr') & (gini_calc.series >= 23)]\n",
+ "X = np.array(equity[['gini_spread', 'duration', 'moneyness']])\n",
"y = np.array(equity['corr_at_detach'])\n",
"\n",
- "gam_model = LinearGAM(s(0) + s(1) + s(2))\n",
+ "#Fit for Lamda\n",
+ "gam_model = GAM(s(0, n_splines=5) +\n",
+ " s(1, n_splines=5) +\n",
+ " s(2, n_splines=5))\n",
"lam = np.logspace(-3, 5, 5, base=3)\n",
"lams = [lam] * 3\n",
- "\n",
"gam_model.gridsearch(X, y, lam=lams)\n",
+ "\n",
"gam_model.summary()"
]
},
@@ -273,10 +387,9 @@
"outputs": [],
"source": [
"## plotting\n",
- "plt.figure();\n",
"fig, axs = plt.subplots(1,3);\n",
"\n",
- "titles = ['gini_coeff', 'duration', 'moneyness']\n",
+ "titles = ['gini_spread', 'duration', 'moneyness']\n",
"for i, ax in enumerate(axs):\n",
" XX = gam_model.generate_X_grid(term=i)\n",
" ax.plot(XX[:, i], gam_model.partial_dependence(term=i, X=XX))\n",
@@ -285,6 +398,29 @@
" ax.set_ylim(-30,30)\n",
" ax.set_title(titles[i]);"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "predict = gam_model.predict(X)\n",
+ "plt.scatter(y, predict)\n",
+ "plt.xlabel('actual correlation')\n",
+ "plt.ylabel('predicted correlation')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "today = (equity.loc[max(equity.index)])\n",
+ "predict_HY31 = gam_model.predict(np.array(today[today.series==31][['gini_spread', 'duration', 'moneyness']]))\n",
+ "today[today.series==31][['corr_at_detach']], predict_HY31"
+ ]
}
],
"metadata": {