{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import itertools\n",
    "import datetime\n",
    "import exploration.dispersion as disp\n",
    "import matplotlib.pyplot as plt\n",
    "import statsmodels.formula.api as smf\n",
    "import analytics.tranche_data as tdata\n",
    "\n",
    "from analytics.basket_index import MarkitBasketIndex\n",
    "from analytics import on_the_run\n",
    "from statsmodels.graphics.regressionplots import plot_fit\n",
    "from pygam import LinearGAM, s, f, GAM\n",
    "from utils.db import dbengine, dbconn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "value_date = (datetime.datetime.today() - pd.offsets.BDay(1)).date()\n",
    "start = (datetime.datetime.today() - pd.offsets.BDay(1) * 365 *4).date()\n",
    "#end = (start + pd.offsets.BDay(1) * 365).date()\n",
    "end = datetime.datetime.today()\n",
    "index_type = 'IG'\n",
    "serenitasconn = dbconn(\"serenitasdb\")\n",
    "serenitasconn.autocommit = True\n",
    "risk = disp.get_tranche_data(serenitasconn, index_type)\n",
    "train_data = risk[start: end]\n",
    "gini_calc, gini_model = disp.create_models(serenitasconn, train_data)\n",
    "gini_model.fit().summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "gini_calc.xs(31, level = 'series')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#use trained model to fit rest of the data\n",
    "f = gini_model.fit()\n",
    "risk.loc[risk.index.get_level_values(\"attach\") != attach_max, \"predict\"] = expit(f.predict(bottom_stack))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Plot Gini if (use gini=True, use_log=False)\n",
    "to_plot_gini = gini_calc.xs(0, level='attach').groupby(['date', 'series']).nth(-1)\n",
    "to_plot_gini['gini'].unstack().plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#last day: mispricing\n",
    "today = gini_calc.xs([value_date,33], level=['date','series'])\n",
    "today[['exp_percentage', 'predict', 'mispricing']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#plot mispricing of a tranche through time \n",
    "attach = 0\n",
    "series = 33\n",
    "to_plot = gini_calc.xs([attach, series], level=['attach', 'series'])['mispricing']\n",
    "to_plot.reset_index(['index','tenor'], drop=True).unstack().plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "gini_calc.xs([attach, series], level=['attach', 'series']).to_clipboard()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#plot mispricing of series through time \n",
    "series = 33\n",
    "to_plot = gini_calc.xs(series, level='series')['mispricing']\n",
    "to_plot.reset_index(['index','tenor'], drop=True).unstack().plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plot_fit(gini_model[0], 'np.log(index_duration)')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "to_csv('/home/serenitas/edwin/Python/temp1.csv')\n",
    "\n",
    "gini_calc.to_csv('/home/serenitas/edwin/Python/' + index_type+ '_tranche_model.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#compare to realized delta-adjusted return\n",
    "tranche_returns = tdata.get_tranche_quotes(index=index_type)\n",
    "tranche_returns = tdata.tranche_returns(df=tranche_returns)\n",
    "attach = 0\n",
    "t = tranche_returns['delhedged_return'].reset_index(['index', 'tenor'], drop=True).xs(attach, level='attach')\n",
    "temp={}\n",
    "for i,g in t.groupby('series'):\n",
    "    temp[i] = (g.dropna()+1).cumprod()\n",
    "t = pd.concat(temp).reset_index(0, drop=True)\n",
    "t.unstack(level='series').plot()\n",
    "tranche_returns.to_csv('/home/serenitas/edwin/Python/temp3.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "attach = 0\n",
    "\n",
    "returns = tranche_returns.xs(['HY', 29, '5yr', attach], level = ['index', 'series', 'tenor','attach'])['delhedged_return']\n",
    "model = gini_calc.xs(['HY', 29, '5yr', attach], level = ['index', 'series', 'tenor','attach'])['mispricing']\n",
    "returns = pd.merge(returns, model, left_index=True, right_index=True)\n",
    "model_verification = smf.ols(\"delhedged_return ~ mispricing \", data=returns).fit()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tranche_returns.xs(29, level='series').unstack(level='attach').to_csv('/home/serenitas/edwin/Python/temp1.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#compare models\n",
    "a = [True, False]\n",
    "for years in [1,2,3,4,5,6]:\n",
    "    date_range = pd.bdate_range(end=value_date, freq='5B',periods=52*years)\n",
    "    risk = disp.get_tranche_data(index_type, serenitas_engine)\n",
    "    risk = risk[risk.index.get_level_values(0).isin(date_range)]\n",
    "    for x, y in list(itertools.product(a,a)):\n",
    "        gini_model, gini_calc = disp.create_models(risk, use_gini=x, use_log=y)\n",
    "        for i, m in gini_model.items():\n",
    "            print (years, x, y, i, m.rsquared)\n",
    "        today = gini_calc.xs([value_date,33], level=['date','series'])\n",
    "        print (today[['exp_percentage', 'predict_N', 'predict_preN', 'mispricing']])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Run a particular gini scenario\n",
    "scenario = gini_calc.loc(axis=0)[value_date,33,'HY','5yr',0]\n",
    "scenario['dispersion'] = .6\n",
    "scenario_disp = np.exp(gini_model[0].predict(scenario))\n",
    "mispricing = (scenario['exp_percentage'] - scenario_disp) * \\\n",
    "             scenario['index_expected_loss'] / \\\n",
    "             (scenario['detach_adj'] - scenario['attach_adj']) / \\\n",
    "             scenario['indexfactor'] * 10000\n",
    "mispricing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "gini_calc.loc(axis=0)[:,33,'HY','5yr',0]['mispricing']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Let's use a GAM model instead?\n",
    "#only use the 5yr point for modeling\n",
    "equity = gini_calc.loc(axis=0)[:,:,[25,27,29,31,33],'5yr',0]\n",
    "X = np.array(equity[['gini_spread', 'duration', 'moneyness']])\n",
    "y = np.array(equity['exp_percentage'])\n",
    "\n",
    "#Fit for Lamda\n",
    "gam_model = GAM(s(0, n_splines=5) +\n",
    "                  s(1, n_splines=5) +\n",
    "                  s(2, n_splines=5))\n",
    "lam = np.logspace(-3, 5, 5, base=3)\n",
    "lams = [lam] * 3\n",
    "gam_model.gridsearch(X, y, lam=lams)\n",
    "\n",
    "gam_model.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "## plotting\n",
    "fig, axs = plt.subplots(1,3);\n",
    "\n",
    "titles = ['gini_spread', 'duration', 'moneyness']\n",
    "for i, ax in enumerate(axs):\n",
    "    XX = gam_model.generate_X_grid(term=i)\n",
    "    ax.plot(XX[:, i], gam_model.partial_dependence(term=i, X=XX))\n",
    "    ax.plot(XX[:, i], gam_model.partial_dependence(term=i, X=XX, width=.95)[1], c='r', ls='--')\n",
    "    if i == 0:\n",
    "        ax.set_ylim(-30,30)\n",
    "    ax.set_title(titles[i]);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.scatter(y, gam_model.predict(X))\n",
    "plt.xlabel('actual correlation')\n",
    "plt.ylabel('predicted correlation')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "today = gini_calc.loc(axis=0)[value_date,'HY',33,'5yr',0]\n",
    "predict_HY33 = gam_model.predict(np.array(today[['gini_spread', 'duration', 'moneyness']]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "today, predict_HY33"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "deltas = []\n",
    "for s in portf.swaptions:\n",
    "    deltas.append(s.delta)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "value_date = (datetime.datetime.today() - pd.offsets.BDay(1)).date()\n",
    "start = (datetime.datetime.today() - pd.offsets.BDay(1) * 365 *4).date()\n",
    "#end = (start + pd.offsets.BDay(1) * 365).date()\n",
    "end = datetime.datetime.today()\n",
    "gini_model, gini_results = {}, {}\n",
    "conn = dbconn(\"serenitasdb\")\n",
    "conn.autocommit = True\n",
    "for index_type in ['HY', 'IG', 'EU', 'XO']:\n",
    "    risk = disp.get_tranche_data(dbconn(\"serenitasdb\"), index_type)\n",
    "    #gini_results[index_type], gini_model[index_type] = disp.create_separate_models(risk)\n",
    "    gini_results[index_type], gini_model[index_type] = disp.create_models_v2(conn, risk)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#gini_model['HY'][0].summary()\n",
    "gini_model['HY'].fit().summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "gini_results['HY']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fieldlist = ['exp_percentage','dispersion','gini','tranche_loss_per','mispricing']\n",
    "for index_type in ['HY', 'IG', 'EU', 'XO']:\n",
    "    gini_results[index_type][fieldlist].to_csv('/home/serenitas/edwin/' + index_type + '_results.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}