aboutsummaryrefslogtreecommitdiffstats
path: root/python/exploration/dispersion.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/exploration/dispersion.py')
-rw-r--r--python/exploration/dispersion.py76
1 files changed, 40 insertions, 36 deletions
diff --git a/python/exploration/dispersion.py b/python/exploration/dispersion.py
index 575877ba..f0c5e5e8 100644
--- a/python/exploration/dispersion.py
+++ b/python/exploration/dispersion.py
@@ -11,21 +11,20 @@ from dateutil.relativedelta import relativedelta
from utils.db import dbengine
-def get_dispersion(index_type, series, end_date=datetime.date.today()):
+def get_dispersion(index_type, series, use_gini=False, use_log=True, dr=None):
index = MarkitBasketIndex(index_type, series, ["5yr"])
- dr = pd.bdate_range(index.issue_date, end_date)
+ if dr is None:
+ dr = pd.bdate_range(
+ index.issue_date, datetime.datetime.today() - pd.offsets.BDay(1)
+ )
dispersion = []
- cumloss = []
for d in dr:
print(d)
index.value_date = d
- dispersion.append(index.dispersion())
- cumloss.append(index.cumloss)
+ dispersion.append(index.dispersion(use_gini, use_log))
- return pd.DataFrame(
- {"dispersion": dispersion, "cumloss": cumloss,}, index=dr, name="dispersion",
- )
+ return pd.DataFrame(dispersion, index=dr, columns=["dispersion"])
def get_corr_data(index_type, series, engine):
@@ -72,28 +71,51 @@ def get_tranche_data(index_type, engine):
["date", "index", "series", "version", "tenor", "attach"], as_index=False
).mean()
df = df.assign(
- moneyness=lambda x: np.clip(
- (x.detach - x.cumulativeloss) / x.indexfactor / x.index_expected_loss,
- 0.0,
- 1.0,
- ),
exp_percentage=lambda x: x.expected_loss / x.index_expected_loss,
+ attach_adj=lambda x: np.maximum(
+ (x.attach - x.cumulativeloss) / df.indexfactor, 0
+ ),
+ detach_adj=lambda x: np.minimum(
+ (x.detach - x.cumulativeloss) / df.indexfactor, 1
+ ),
+ )
+ df = df.assign(
+ moneyness=lambda x: (x.detach_adj + x.attach_adj)
+ / 2
+ / x.indexfactor
+ / x.index_expected_loss,
)
- df.set_index(["index", "series", "tenor", "attach"], append=True, inplace=True)
+ df.set_index(
+ ["date", "index", "series", "tenor", "attach"], append=True, inplace=True
+ )
+ df.reset_index(level=0, drop=True, inplace=True)
return df
-def create_gini_models(df):
+def create_models(df, use_gini=False, use_log=True):
# Takes the output of get_tranche_data
+ dispersion = {}
+ for g, _ in df.groupby(["series", "index"]):
+ temp = df.xs(g[0], level="series")
+ date_range = temp.index.get_level_values("date").unique()
+ dispersion[g[0]] = get_dispersion(
+ g[1], g[0], use_gini=use_gini, use_log=use_log, dr=date_range
+ )
+ dispersion = pd.concat(dispersion)
+ dispersion.index.rename("series", level=0, inplace=True)
+ df = df.merge(dispersion, left_index=True, right_index=True)
+ df.dropna(subset=["dispersion"], inplace=True)
gini_model, gini_calc = {}, {}
for attach in df.index.get_level_values("attach").unique():
- gini_calc[attach] = df.loc(axis=0)[:, :, :, "5yr", attach]
+ gini_calc[attach] = df.xs(
+ ["5yr", attach], level=["tenor", "attach"], drop_level=False
+ )
gini_model[attach] = smf.ols(
"np.log(exp_percentage) ~ "
- "np.log(gini_spread) + "
+ "dispersion + "
"np.log(index_duration) + "
"np.log(moneyness)",
- data=gini_calc[attach],
+ data=df.xs(attach, level="attach"),
).fit()
gini_calc[attach]["predict"] = np.exp(
gini_model[attach].predict(gini_calc[attach])
@@ -116,24 +138,6 @@ def create_gini_models(df):
return gini_model, gini_calc
-def gini(array):
- """Calculate the Gini coefficient of a numpy array."""
- if np.amin(array) < 0:
- array -= np.amin(array) # values cannot be negative
- array += 0.0000001 # values cannot be 0
- array = np.sort(array) # values must be sorted
- index = np.arange(1, array.shape[0] + 1) # index per array element
- n = array.shape[0] # number of array elements
- return (np.sum((2 * index - n - 1) * array)) / (n * np.sum(array))
-
-
-def get_gini_spreadstdev(index_type, series, tenor, date):
- indices = MarkitBasketIndex(index_type, series, tenor, value_date=date)
- spreads = indices.spreads()
- spreads = np.ravel(spreads)
- return (gini(spreads), np.std(spreads))
-
-
if __name__ == "__main__":
index_type = "HY"
series = 29