diff options
Diffstat (limited to 'python')
| -rw-r--r-- | python/exploration/dispersion.py | 76 | ||||
| -rw-r--r-- | python/notebooks/Dispersion.ipynb | 25 |
2 files changed, 54 insertions, 47 deletions
diff --git a/python/exploration/dispersion.py b/python/exploration/dispersion.py index 575877ba..f0c5e5e8 100644 --- a/python/exploration/dispersion.py +++ b/python/exploration/dispersion.py @@ -11,21 +11,20 @@ from dateutil.relativedelta import relativedelta from utils.db import dbengine -def get_dispersion(index_type, series, end_date=datetime.date.today()): +def get_dispersion(index_type, series, use_gini=False, use_log=True, dr=None): index = MarkitBasketIndex(index_type, series, ["5yr"]) - dr = pd.bdate_range(index.issue_date, end_date) + if dr is None: + dr = pd.bdate_range( + index.issue_date, datetime.datetime.today() - pd.offsets.BDay(1) + ) dispersion = [] - cumloss = [] for d in dr: print(d) index.value_date = d - dispersion.append(index.dispersion()) - cumloss.append(index.cumloss) + dispersion.append(index.dispersion(use_gini, use_log)) - return pd.DataFrame( - {"dispersion": dispersion, "cumloss": cumloss,}, index=dr, name="dispersion", - ) + return pd.DataFrame(dispersion, index=dr, columns=["dispersion"]) def get_corr_data(index_type, series, engine): @@ -72,28 +71,51 @@ def get_tranche_data(index_type, engine): ["date", "index", "series", "version", "tenor", "attach"], as_index=False ).mean() df = df.assign( - moneyness=lambda x: np.clip( - (x.detach - x.cumulativeloss) / x.indexfactor / x.index_expected_loss, - 0.0, - 1.0, - ), exp_percentage=lambda x: x.expected_loss / x.index_expected_loss, + attach_adj=lambda x: np.maximum( + (x.attach - x.cumulativeloss) / df.indexfactor, 0 + ), + detach_adj=lambda x: np.minimum( + (x.detach - x.cumulativeloss) / df.indexfactor, 1 + ), + ) + df = df.assign( + moneyness=lambda x: (x.detach_adj + x.attach_adj) + / 2 + / x.indexfactor + / x.index_expected_loss, ) - df.set_index(["index", "series", "tenor", "attach"], append=True, inplace=True) + df.set_index( + ["date", "index", "series", "tenor", "attach"], append=True, inplace=True + ) + df.reset_index(level=0, drop=True, inplace=True) return df -def create_gini_models(df): +def create_models(df, use_gini=False, use_log=True): # Takes the output of get_tranche_data + dispersion = {} + for g, _ in df.groupby(["series", "index"]): + temp = df.xs(g[0], level="series") + date_range = temp.index.get_level_values("date").unique() + dispersion[g[0]] = get_dispersion( + g[1], g[0], use_gini=use_gini, use_log=use_log, dr=date_range + ) + dispersion = pd.concat(dispersion) + dispersion.index.rename("series", level=0, inplace=True) + df = df.merge(dispersion, left_index=True, right_index=True) + df.dropna(subset=["dispersion"], inplace=True) gini_model, gini_calc = {}, {} for attach in df.index.get_level_values("attach").unique(): - gini_calc[attach] = df.loc(axis=0)[:, :, :, "5yr", attach] + gini_calc[attach] = df.xs( + ["5yr", attach], level=["tenor", "attach"], drop_level=False + ) gini_model[attach] = smf.ols( "np.log(exp_percentage) ~ " - "np.log(gini_spread) + " + "dispersion + " "np.log(index_duration) + " "np.log(moneyness)", - data=gini_calc[attach], + data=df.xs(attach, level="attach"), ).fit() gini_calc[attach]["predict"] = np.exp( gini_model[attach].predict(gini_calc[attach]) @@ -116,24 +138,6 @@ def create_gini_models(df): return gini_model, gini_calc -def gini(array): - """Calculate the Gini coefficient of a numpy array.""" - if np.amin(array) < 0: - array -= np.amin(array) # values cannot be negative - array += 0.0000001 # values cannot be 0 - array = np.sort(array) # values must be sorted - index = np.arange(1, array.shape[0] + 1) # index per array element - n = array.shape[0] # number of array elements - return (np.sum((2 * index - n - 1) * array)) / (n * np.sum(array)) - - -def get_gini_spreadstdev(index_type, series, tenor, date): - indices = MarkitBasketIndex(index_type, series, tenor, value_date=date) - spreads = indices.spreads() - spreads = np.ravel(spreads) - return (gini(spreads), np.std(spreads)) - - if __name__ == "__main__": index_type = "HY" series = 29 diff --git a/python/notebooks/Dispersion.ipynb b/python/notebooks/Dispersion.ipynb index 59e98647..84701ba8 100644 --- a/python/notebooks/Dispersion.ipynb +++ b/python/notebooks/Dispersion.ipynb @@ -37,7 +37,9 @@ "metadata": {}, "outputs": [], "source": [ - "value_date = (pd.datetime.today() - pd.offsets.BDay(1)).date()\n", + "value_date = (datetime.datetime.today() - pd.offsets.BDay(1)).date()\n", + "start_date = datetime.date(2019,9,27)\n", + "end_date = datetime.date(2020,1,30)\n", "index_type = 'HY'" ] }, @@ -48,9 +50,10 @@ "outputs": [], "source": [ "#Get Gini factor\n", - "date_range = pd.bdate_range(end=value_date, freq='5B',periods=52*.5)\n", - "risk = disp.get_tranche_data(index_type, date_range, serenitas_engine)\n", - "gini_model, gini_calc = disp.create_gini_models(risk)" + "date_range = pd.bdate_range(end=value_date, freq='1B',periods=52*4)\n", + "risk = disp.get_tranche_data(index_type, serenitas_engine)\n", + "risk = risk[risk.index.get_level_values(0).isin(date_range)]\n", + "gini_model, gini_calc = disp.create_models(risk, use_gini=True, use_log=False)" ] }, { @@ -59,8 +62,8 @@ "metadata": {}, "outputs": [], "source": [ - "to_plot_gini = gini_calc.loc(axis=0)[:,:,:,'5yr',0].groupby(['date', 'series']).nth(-1)\n", - "to_plot_gini['gini_spread'].unstack().plot()" + "to_plot_gini = gini_calc.xs(0, level='attach').groupby(['date', 'series']).nth(-1)\n", + "to_plot_gini['dispersion'].unstack().plot()" ] }, { @@ -69,7 +72,7 @@ "metadata": {}, "outputs": [], "source": [ - "today = gini_calc.loc(axis=0)[value_date,:,33,'5yr',:]\n", + "today = gini_calc.xs([value_date,33], level=['date','series'])\n", "today[['exp_percentage', 'predict_N', 'predict_preN', 'mispricing']]" ] }, @@ -79,8 +82,8 @@ "metadata": {}, "outputs": [], "source": [ - "to_plot = gini_calc.loc(axis=0)[:,:,:,'5yr',0]['mispricing']\n", - "to_plot.reset_index(['index','tenor','attach'], drop=True).unstack().plot()" + "to_plot = gini_calc.xs(0, level='attach')['mispricing']\n", + "to_plot.reset_index(['index','tenor'], drop=True).unstack().plot()" ] }, { @@ -99,8 +102,8 @@ "outputs": [], "source": [ "#Run a particular gini scenario\n", - "scenario = gini_calc.loc(axis=0)[value_date,'HY',33,'5yr',0]\n", - "scenario['gini_spread'] = .6\n", + "scenario = gini_calc.loc(axis=0)[value_date,33,'HY','5yr',0]\n", + "scenario['dispersion'] = .6\n", "scenario_disp = np.exp(gini_model[0].predict(scenario))\n", "mispricing = (scenario['exp_percentage'] - scenario_disp) * \\\n", " scenario['index_expected_loss'] / \\\n", |
