In [None]:
import pandas as pd
import numpy as np
import datetime
import exploration.dispersion as disp
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf

from analytics.basket_index import MarkitBasketIndex
from analytics import on_the_run
from statsmodels.graphics.regressionplots import plot_fit
from pygam import LinearGAM, s, f, GAM
from utils.db import dbengine

serenitas_engine = dbengine('serenitasdb')

In [None]:
%matplotlib inline

In [None]:
value_date = (pd.datetime.today() - pd.offsets.BDay(1)).date()
index_type = 'HY'

In [None]:
#Get Gini factor
risk = disp.get_tranche_data(index_type, serenitas_engine)
date_range = pd.bdate_range(end=value_date, freq='5B',periods=52*4)
risk = risk[(risk.index.isin(date_range))]
temp = risk.apply(lambda x: disp.get_gini_spreadstdev(x['index'], x.series, [x.tenor], x.name), axis=1)
risk[['gini_spread', 'std_spread']] = pd.DataFrame(temp.values.tolist(), columns=['gini_spread','std_spread'], index=temp.index)

In [None]:
gini_calc = risk[(risk.index.isin(date_range)) & (risk.attach == 0)]
to_plot_gini = gini_calc[(gini_calc.tenor == '5yr')].groupby(['date', 'series']).nth(-1)
to_plot_gini['gini_spread'].unstack().plot()

In [None]:
##### model expected loss percentage instead of correlation -- equity
gini_calc = risk[(risk.attach == 0)]
gini_model = smf.ols("np.log(exp_percentage) ~ np.log(gini_spread) + np.log(duration) + np.log(moneyness)", data=gini_calc).fit()
gini_model.summary()

In [None]:
plot_fit(gini_model, 'np.log(moneyness)')

In [None]:
gini_calc['predict'] = np.exp(gini_model.predict(gini_calc))

In [None]:
today = gini_calc[(gini_calc['index'] == index_type) & (gini_calc['series'] == 33)].loc[value_date]
today

In [None]:
#Let's use a GAM model instead?
#only use the 5yr point for modeling
equity = gini_calc[(gini_calc.tenor=='5yr') & (gini_calc.series >= 23)]
X = np.array(equity[['gini_spread', 'duration', 'moneyness']])
y = np.array(equity['exp_percentage'])

#Fit for Lamda
gam_model = GAM(s(0, n_splines=5) +
 s(1, n_splines=5) +
 s(2, n_splines=5))
lam = np.logspace(-3, 5, 5, base=3)
lams = [lam] * 3
gam_model.gridsearch(X, y, lam=lams)

gam_model.summary()

In [None]:
## plotting
fig, axs = plt.subplots(1,3);

titles = ['gini_spread', 'duration', 'moneyness']
for i, ax in enumerate(axs):
 XX = gam_model.generate_X_grid(term=i)
 ax.plot(XX[:, i], gam_model.partial_dependence(term=i, X=XX))
 ax.plot(XX[:, i], gam_model.partial_dependence(term=i, X=XX, width=.95)[1], c='r', ls='--')
 if i == 0:
 ax.set_ylim(-30,30)
 ax.set_title(titles[i]);

In [None]:
plt.scatter(y, gam_model.predict(X))
plt.xlabel('actual correlation')
plt.ylabel('predicted correlation')

In [None]:
today = (equity.loc[max(equity.index)])
predict_HY33 = gam_model.predict(np.array(today[today.series==33][['gini_spread', 'duration', 'moneyness']]))
today[today.series==31][['exp_percentage']], predict_HY33