aboutsummaryrefslogtreecommitdiffstats
path: root/python/exploration
diff options
context:
space:
mode:
Diffstat (limited to 'python/exploration')
-rw-r--r--python/exploration/dispersion.py69
1 files changed, 22 insertions, 47 deletions
diff --git a/python/exploration/dispersion.py b/python/exploration/dispersion.py
index f4c7a076..d7fb8a73 100644
--- a/python/exploration/dispersion.py
+++ b/python/exploration/dispersion.py
@@ -16,26 +16,16 @@ def get_dispersion(index_type, series, end_date=datetime.date.today()):
dr = pd.bdate_range(index.issue_date, end_date)
dispersion = []
+ cumloss = []
for d in dr:
+ print(d)
index.value_date = d
- dispersion.append(bn.nanstd(np.log(index.spreads())))
+ dispersion.append(index.dispersion())
+ cumloss.append(index.cumloss)
- return pd.Series(dispersion, index=dr, name="dispersion")
-
-
-def add_cumloss(df, index_type, series, engine):
- cumloss = pd.read_sql_query(
- "SELECT lastdate, cumulativeloss AS cumloss FROM index_version "
- "WHERE index=%s and series=%s order by lastdate",
- engine,
- params=(index_type, series),
- parse_dates=["lastdate"],
- )
- cumloss.iat[-1, 0] = pd.Timestamp.max
- cumloss = (
- cumloss.set_index("lastdate").sort_index().reindex(df.index, method="bfill")
+ return pd.DataFrame(
+ {"dispersion": dispersion, "cumloss": cumloss,}, index=dr, name="dispersion",
)
- return df.join(cumloss)
def get_corr_data(index_type, series, engine):
@@ -68,39 +58,26 @@ def get_corr_data(index_type, series, engine):
def get_tranche_data(index_type, engine):
- sql_string = "select * from index_version where index = %s"
- idx_ver = pd.read_sql_query(
- sql_string, engine, params=[index_type,], parse_dates=["lastdate"]
+ sql_string = (
+ "SELECT * FROM risk_numbers "
+ "LEFT JOIN index_version USING (index, series, version) "
+ "WHERE index = %s"
)
- idx_ver["date"] = pd.to_datetime(
- [
- d.strftime("%Y-%m-%d") if not pd.isnull(d) else datetime.date(2050, 1, 1)
- for d in idx_ver["lastdate"]
- ]
- )
- sql_string = "select * from risk_numbers where index = %s"
df = pd.read_sql_query(
sql_string, engine, parse_dates={"date": {"utc": True}}, params=[index_type]
)
- df["exp_percentage"] = df["expected_loss"] / df["index_expected_loss"]
+ del df["basketid"]
df.date = df.date.dt.normalize().dt.tz_convert(None)
- df = df.groupby(["date", "index", "series", "tenor", "attach"]).mean()
- df.reset_index(inplace=True)
- idx_ver.sort_values(by=["date"], inplace=True, ascending=True)
- df = pd.merge_asof(
- df,
- idx_ver[["date", "series", "cumulativeloss", "indexfactor"]],
- left_on=["date"],
- right_on=["date"],
- by="series",
- direction="forward",
- )
- df.set_index("date", inplace=True)
- df["moneyness"] = df.apply(
- lambda df: (df.detach - df.cumulativeloss)
- / df.indexfactor
- / df.index_expected_loss,
- axis=1,
+ df = df.groupby(
+ ["date", "index", "series", "version", "tenor", "attach"], as_index=False
+ ).mean()
+ df = df.assign(
+ moneyness=lambda x: np.clip(
+ (x.detach - x.cumulativeloss) / x.indexfactor / x.index_expected_loss,
+ 0.0,
+ 1.0,
+ ),
+ exp_percentage=lambda x: x.expected_loss / x.index_expected_loss,
)
return df
@@ -119,7 +96,7 @@ def gini(array):
def get_gini_spreadstdev(index_type, series, tenor, date):
indices = MarkitBasketIndex(index_type, series, tenor, value_date=date)
spreads = indices.spreads()
- spreads = spreads[spreads < 1]
+ spreads = np.ravel(spreads)
return (gini(spreads), np.std(spreads))
@@ -130,8 +107,6 @@ if __name__ == "__main__":
dispersion = get_dispersion(index_type, series)
df = get_corr_data(index_type, series, serenitas_engine)
df = df.join(dispersion)
- if index_type == "HY":
- df = add_cumloss(df, index_type, series, serenitas_engine)
if index_type == "HY":
formula = "fisher ~ np.log(dispersion) + cumloss + np.log(index_duration)"