{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import datetime\n",
    "import exploration.dispersion as disp\n",
    "import matplotlib.pyplot as plt\n",
    "import statsmodels.formula.api as smf\n",
    "\n",
    "from analytics.basket_index import MarkitBasketIndex\n",
    "from analytics import on_the_run\n",
    "from statsmodels.graphics.regressionplots import plot_fit\n",
    "from pygam import LinearGAM, s, f, GAM\n",
    "from utils.db import dbengine\n",
    "\n",
    "serenitas_engine = dbengine('serenitasdb')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "value_date = (pd.datetime.today() - pd.offsets.BDay(1)).date()\n",
    "index_type = 'HY'\n",
    "series = 33"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "series_back = 2\n",
    "sql_string = \"select * from index_members(%s, %s)\"\n",
    "\n",
    "df = pd.read_sql_query(sql_string, serenitas_engine, params=(index_type + str(series), value_date), index_col=['markit_ticker'])\n",
    "df1 = pd.read_sql_query(sql_string, serenitas_engine, params=(index_type + str(series-series_back), value_date), index_col=['markit_ticker'])\n",
    "\n",
    "default_prob = {}\n",
    "for s in [series, series-series_back]:\n",
    "    index = MarkitBasketIndex(index_type, s, ['5yr'])\n",
    "    surv_prob, tickers = index.survival_matrix()\n",
    "    default_prob[s] = pd.Series(1 - np.ravel(surv_prob), index=tickers)\n",
    "default_prob = pd.concat(default_prob, names=['series', 'markit_ticker'])\n",
    "default_prob.name = 'default_prob'\n",
    "\n",
    "df = df.merge(default_prob.loc[series], left_index=True, right_index = True)\n",
    "df1 = df1.merge(default_prob.loc[series-series_back], left_index=True, right_index = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Removals\n",
    "df1.loc[df1.index.difference(df.index)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Additions\n",
    "df.loc[df.index.difference(df1.index)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "index_type, series, df.nlargest(10, columns='default_prob')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "index_type, series-series_back, df1.nlargest(10, columns='default_prob')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "date_range = pd.bdate_range(end=value_date, freq='1BM',periods=12)\n",
    "index = MarkitBasketIndex(index_type, series, ['5yr'])\n",
    "default_prob = {}\n",
    "for d in date_range:\n",
    "    index.value_date = d\n",
    "    surv_prob, tickers = index.survival_matrix()\n",
    "    default_prob[d] = pd.Series(1 - np.ravel(surv_prob), index=tickers)\n",
    "default_prob = pd.concat(default_prob)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Top 20 highest cumulative\n",
    "top20 = default_prob.unstack(-1)[default_prob[value_date].nlargest(20).index]\n",
    "top20.index.name='date'\n",
    "top20.columns.name='tickers'\n",
    "ax = top20.plot(title=f'market implied default probabilities to {index.maturities[0]}', figsize=(10,6))\n",
    "ax.legend(loc='upper center', bbox_to_anchor=(1.3, 1), ncol=1)\n",
    "ax.set(xlabel='date', ylabel='probability')\n",
    "plt.tight_layout()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tenors = ['3yr', '5yr', '7yr', '10yr']\n",
    "#index_type = 'IG'\n",
    "#series = 26\n",
    "indices = MarkitBasketIndex(index_type, series, tenors)\n",
    "indices.value_date = datetime.date.today()\n",
    "today_surv_prob, tickers = indices.survival_matrix()\n",
    "today_default_prob = pd.DataFrame(1 - today_surv_prob, index=tickers, columns=tenors)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "####################### NAV Basis\n",
    "\n",
    "#            HY              |           IG\n",
    "#+ve index trades risk rich  | index trades risk cheap\n",
    "#-ve single trades risk rich | single trades risk cheap\n",
    "\n",
    "sql_string = \"select * from index_quotes where index = %s and tenor = '5yr'\"\n",
    "df = pd.read_sql_query(sql_string, serenitas_engine, params=(index_type,), index_col=['date'])\n",
    "df[\"dist_on_the_run\"] = df.groupby(\"date\")[\"series\"].transform(\n",
    "    lambda x: x.max() - x\n",
    ")\n",
    "df = df.groupby(['date', 'series']).nth(-1)            #take the last version\n",
    "df['basis'] = df.closespread - df.modelspread if index_type == 'IG' else df.closeprice - df.modelprice\n",
    "df.set_index('dist_on_the_run', append=True, inplace=True)\n",
    "df.reset_index('series', inplace=True)\n",
    "basis = df['basis'].unstack()\n",
    "stats = pd.DataFrame([basis.min(), basis.mean(), basis.max(), \n",
    "                     basis.quantile(.01), basis.quantile(.05), basis.quantile(.95), basis.quantile(.99)],\n",
    "                     index=['min', 'mean', 'max', \n",
    "                            '1%tile', '5%tile', '95%tile', '99%tile'])\n",
    "stats"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "####################### Get Gini on indices: this calc bombs a lot so let's do the ones that we were able to calc before (dropna)\n",
    "df_gini_calc_temp = df.dropna().loc[datetime.date(2019,1,1):, :].reset_index('dist_on_the_run')[\n",
    "    ['index','series', 'tenor', 'duration', 'basis', 'closespread']]\n",
    "temp = df_gini_calc_temp.apply(get_gini_spreadstdev, axis=1)\n",
    "temp = pd.DataFrame(temp.values.tolist(), columns=['gini_spread','std_spread'], index=temp.index)\n",
    "df_gini_calc = pd.concat([df_gini_calc_temp, temp], axis=1).dropna()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "####################### Get Gini on the run only - same calc as above\n",
    "df_gini_calc_temp = df.groupby(['date']).nth(-1).dropna()[\n",
    "    ['index','series', 'tenor', 'duration', 'basis', 'closespread']]\n",
    "temp = df_gini_calc_temp.apply(get_gini_spreadstdev, axis=1)\n",
    "temp = pd.DataFrame(temp.values.tolist(), columns=['gini_spread','std_spread'], index=temp.index)\n",
    "df_gini_calc = pd.concat([df_gini_calc_temp, temp], axis=1).dropna()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#######################OLS regression of NAV basis to spread/duration\n",
    "#basis_gini_model = smf.ols(\"basis ~ np.log(duration) + np.log(closespread) + np.log(gini_spread)\", data=df_gini_calc).fit()\n",
    "#basis_gini_model.summary()\n",
    "\n",
    "#Let's use a GAM model instead?\n",
    "X = np.array(df_gini_calc[['duration', 'closespread', 'gini_spread']])\n",
    "y = np.array(df_gini_calc[['basis']])\n",
    "\n",
    "basis_model = GAM(s(0, constraints='concave') +\n",
    "                  s(1, constraints='concave') +\n",
    "                  s(2, constraints='concave'))\n",
    "\n",
    "lam = np.logspace(-3, 5, 5, base=10)\n",
    "lams = [lam] * 3\n",
    "\n",
    "basis_model.gridsearch(X, y, lam=lams)\n",
    "basis_model.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "## plotting\n",
    "fig, axs = plt.subplots(1,3);\n",
    "\n",
    "titles = ['duration', 'closespread', 'gini_spread']\n",
    "for i, ax in enumerate(axs):\n",
    "    XX = basis_model.generate_X_grid(term=i)\n",
    "    ax.plot(XX[:, i], basis_model.partial_dependence(term=i, X=XX))\n",
    "    ax.plot(XX[:, i], basis_model.partial_dependence(term=i, X=XX, width=.95)[1], c='r', ls='--')\n",
    "    if i == 0:\n",
    "        ax.set_ylim(-30,30)\n",
    "    ax.set_title(titles[i]);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "############## predict\n",
    "predict = basis_model.predict(X)\n",
    "plt.scatter(y, predict)\n",
    "plt.xlabel('actual basis')\n",
    "plt.ylabel('predicted basis')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "############## today's basis\n",
    "y[-1], predict[-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "## BBs \n",
    "date_range = pd.bdate_range(end=value_date, freq='5B',periods=52*10)\n",
    "index_spreads = {}\n",
    "index_type = 'HYBB'\n",
    "for d in date_range:\n",
    "    try:\n",
    "        index = MarkitBasketIndex(index_type, on_the_run(index_type, d), ['5yr'], value_date =d)\n",
    "        index_spreads[d] = index.spread()\n",
    "    except:\n",
    "        continue\n",
    "index_spreads = pd.concat(index_spreads)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}