In [None]:
import pandas as pd
import numpy as np
import datetime
import exploration.dispersion as disp
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf

from serenitas.analytics.basket_index import MarkitBasketIndex
from serenitas.analytics.base import Trade
from statsmodels.graphics.regressionplots import plot_fit
from pygam import LinearGAM, s, f, GAM
from serenitas.utils.db import dbengine

serenitas_engine = dbengine('serenitasdb')

In [None]:
%matplotlib inline

In [None]:
value_date = (datetime.date.today() - pd.offsets.BDay(1)).date()
index_type = 'IG'
series = 37

In [None]:
series_back = 2
sql_string = "select * from index_members(%s, %s)"

df = pd.read_sql_query(sql_string, serenitas_engine, params=(index_type + str(series), value_date), index_col=['markit_ticker'])
df1 = pd.read_sql_query(sql_string, serenitas_engine, params=(index_type + str(series-series_back), value_date), index_col=['markit_ticker'])

default_prob = {}
for s in [series, series-series_back]:
    index = MarkitBasketIndex(index_type, s, ['5yr'])
    surv_prob, tickers = index.survival_matrix()
    default_prob[s] = pd.Series(1 - np.ravel(surv_prob), index=tickers)
default_prob = pd.concat(default_prob, names=['series', 'markit_ticker'])
default_prob.name = 'default_prob'

df = df.merge(default_prob.loc[series], left_index=True, right_index = True)
df1 = df1.merge(default_prob.loc[series-series_back], left_index=True, right_index = True)

In [None]:
#Removals
df1.loc[df1.index.difference(df.index)]

In [None]:
#Additions
df.loc[df.index.difference(df1.index)]

In [None]:
index_type, series, df.nlargest(10, columns='default_prob')

In [None]:
index_type, series-series_back, df1.nlargest(10, columns='default_prob')

In [None]:
date_range = pd.bdate_range(end=value_date, freq='1BM',periods=12)
index = MarkitBasketIndex(index_type, series, ['5yr'])
default_prob = {}
for d in date_range:
    index.value_date = d
    surv_prob, tickers = index.survival_matrix()
    default_prob[d] = pd.Series(1 - np.ravel(surv_prob), index=tickers)
default_prob = pd.concat(default_prob)

In [None]:
#Top 20 highest cumulative
top20 = default_prob.unstack(-1)
top20 = top20[top20.iloc[-1].nlargest(25).index]
top20.index.name='date'
top20.columns.name='tickers'
ax = top20.plot(title=f'market implied default probabilities to {index.maturities[0]}', figsize=(10,6))
ax.legend(loc='upper center', bbox_to_anchor=(1.3, 1), ncol=1)
ax.set(xlabel='date', ylabel='probability')
plt.tight_layout()

In [None]:
tenors = ['3yr', '5yr', '7yr', '10yr']
#index_type = 'IG'
#series = 26
indices = MarkitBasketIndex(index_type, series, tenors)
indices.value_date = datetime.date.today()
today_surv_prob, tickers = indices.survival_matrix()
today_default_prob = pd.DataFrame(1 - today_surv_prob, index=tickers, columns=tenors)

In [None]:
####################### NAV Basis

#            HY              |           IG
#+ve index trades risk rich  | index trades risk cheap
#-ve single trades risk rich | single trades risk cheap

sql_string = "select * from index_quotes where index = %s and tenor = '5yr'"
df = pd.read_sql_query(sql_string, serenitas_engine, params=(index_type,), index_col=['date'])
df["dist_on_the_run"] = df.groupby("date")["series"].transform(
    lambda x: x.max() - x
)
df = df.groupby(['date', 'series']).nth(-1)            #take the last version
df['basis'] = df.closespread - df.modelspread if index_type == 'IG' else df.closeprice - df.modelprice
df.set_index('dist_on_the_run', append=True, inplace=True)
df.reset_index('series', inplace=True)
basis = df['basis'].unstack()
stats = pd.DataFrame([basis.min(), basis.mean(), basis.max(), 
                     basis.quantile(.01), basis.quantile(.05), basis.quantile(.95), basis.quantile(.99)],
                     index=['min', 'mean', 'max', 
                            '1%tile', '5%tile', '95%tile', '99%tile'])
stats

In [None]:
#######################OLS regression of NAV basis to spread/duration
#basis_gini_model = smf.ols("basis ~ np.log(duration) + np.log(closespread) + np.log(gini_spread)", data=df_gini_calc).fit()
#basis_gini_model.summary()

#Let's use a GAM model instead?
df_gini_calc = df.dropna().loc[datetime.date(2019,1,1):, :].reset_index('dist_on_the_run')[
    ['index','series', 'tenor', 'duration', 'basis', 'closespread', 'gini']]

X = np.array(df_gini_calc[['duration', 'closespread', 'gini']])
y = np.array(df_gini_calc[['basis']])

basis_model = GAM(s(0, constraints='concave') +
                  s(1, constraints='concave') +
                  s(2, constraints='concave'))

lam = np.logspace(-3, 5, 5, base=10)
lams = [lam] * 3

basis_model.gridsearch(X, y, lam=lams)
basis_model.summary()

In [None]:
## plotting
fig, axs = plt.subplots(1,3);

titles = ['duration', 'closespread', 'gini_spread']
for i, ax in enumerate(axs):
    XX = basis_model.generate_X_grid(term=i)
    ax.plot(XX[:, i], basis_model.partial_dependence(term=i, X=XX))
    ax.plot(XX[:, i], basis_model.partial_dependence(term=i, X=XX, width=.95)[1], c='r', ls='--')
    if i == 0:
        ax.set_ylim(-30,30)
    ax.set_title(titles[i]);

In [None]:
############## predict
predict = basis_model.predict(X)
plt.scatter(y, predict)
plt.xlabel('actual basis')
plt.ylabel('predicted basis')

In [None]:
############## today's basis
y[-1], predict[-1]