In [None]:
import pandas as pd
import numpy as np
import itertools
import datetime
import exploration.dispersion as disp
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
import analytics.tranche_data as tdata

from analytics.basket_index import MarkitBasketIndex
from analytics import on_the_run
from statsmodels.graphics.regressionplots import plot_fit
from scipy.special import logit, expit
from pygam import LinearGAM, s, f, GAM
from utils.db import dbengine, dbconn

In [None]:
%matplotlib inline

In [None]:
value_date = (datetime.datetime.today() - pd.offsets.BDay(1)).date()
start = (datetime.datetime.today() - pd.offsets.BDay(1) * 365 *4).date()
#end = (start + pd.offsets.BDay(1) * 365).date()
end = datetime.datetime.today()
gini_model, gini_results = {}, {}
conn = dbconn("serenitasdb")
conn.autocommit = True
for index_type in ['HY', 'IG', 'EU', 'XO']:
    risk = disp.get_tranche_data(dbconn("serenitasdb"), index_type)
    risk = risk[risk.index_duration > .5] #filter out the short duration ones
    gini_results[index_type], gini_model[index_type] = disp.create_models_v2(conn, risk)
    fitted = gini_model[index_type].fit()
    w = 1/(expit(fitted.fittedvalues + fitted.resid) -expit(fitted.fittedvalues))**2
    gini_results[index_type], gini_model[index_type] = disp.create_models_v2(conn, risk, w)
gini_model['HY'].fit().summary()

In [None]:
fieldlist = ['exp_percentage','dispersion','gini','tranche_loss_per','mispricing']
for index_type in ['HY', 'IG', 'EU', 'XO']:
    gini_results[index_type][fieldlist].to_csv('/home/serenitas/edwin/' + index_type + '_results.csv')

In [None]:
#Run a particular gini scenario
scenario = gini_results['HY'].loc(axis=0)[value_date,'HY',33,:,'5yr',0]
scenario['gini'].iloc[0] = .7
scenario_disp = expit(gini_model['HY'].fit().predict(scenario))
mispricing = scenario['tranche_loss_per'] - scenario_disp
mispricing

In [None]:
#plot the residuals
fitted = gini_model['HY'].fit()
plt.figure(figsize=(8,5))
p=plt.scatter(x=expit(fitted.fittedvalues),y=expit(fitted.fittedvalues + fitted.resid) -expit(fitted.fittedvalues),edgecolor='k')
xmin=min(expit(fitted.fittedvalues))
xmax = max(expit(fitted.fittedvalues))
plt.hlines(y=0,xmin=xmin*0.9,xmax=xmax*1.1,color='red',linestyle='--',lw=3)
plt.xlabel("Fitted values",fontsize=15)
plt.ylabel("Residuals",fontsize=15)
plt.title("Fitted vs. residuals plot",fontsize=18)
plt.grid(True)
plt.show()

In [None]:
value_date = (datetime.datetime.today() - pd.offsets.BDay(1)).date()
start = (datetime.datetime.today() - pd.offsets.BDay(1) * 365 *4).date()
#end = (start + pd.offsets.BDay(1) * 365).date()
end = datetime.datetime.today()
index_type = 'IG'
serenitasconn = dbconn("serenitasdb")
serenitasconn.autocommit = True
risk = disp.get_tranche_data(serenitasconn, index_type)
train_data = risk[start: end]
gini_calc, gini_model = disp.create_models(serenitasconn, train_data)
gini_model.fit().summary()

In [None]:
#compare to realized delta-adjusted return
tranche_returns = tdata.get_tranche_quotes(index=index_type)
tranche_returns = tdata.tranche_returns(df=tranche_returns)
attach = 0
t = tranche_returns['delhedged_return'].reset_index(['index', 'tenor'], drop=True).xs(attach, level='attach')
temp={}
for i,g in t.groupby('series'):
    temp[i] = (g.dropna()+1).cumprod()
t = pd.concat(temp).reset_index(0, drop=True)
t.unstack(level='series').plot()
tranche_returns.to_csv('/home/serenitas/edwin/Python/temp3.csv')

In [None]:
attach = 0

returns = tranche_returns.xs(['HY', 29, '5yr', attach], level = ['index', 'series', 'tenor','attach'])['delhedged_return']
model = gini_calc.xs(['HY', 29, '5yr', attach], level = ['index', 'series', 'tenor','attach'])['mispricing']
returns = pd.merge(returns, model, left_index=True, right_index=True)
model_verification = smf.ols("delhedged_return ~ mispricing ", data=returns).fit()

In [None]:
tranche_returns.xs(29, level='series').unstack(level='attach').to_csv('/home/serenitas/edwin/Python/temp1.csv')