diff options
Diffstat (limited to 'python')
| -rw-r--r-- | python/exploration/dispersion.py | 69 |
1 files changed, 22 insertions, 47 deletions
diff --git a/python/exploration/dispersion.py b/python/exploration/dispersion.py index f4c7a076..d7fb8a73 100644 --- a/python/exploration/dispersion.py +++ b/python/exploration/dispersion.py @@ -16,26 +16,16 @@ def get_dispersion(index_type, series, end_date=datetime.date.today()): dr = pd.bdate_range(index.issue_date, end_date) dispersion = [] + cumloss = [] for d in dr: + print(d) index.value_date = d - dispersion.append(bn.nanstd(np.log(index.spreads()))) + dispersion.append(index.dispersion()) + cumloss.append(index.cumloss) - return pd.Series(dispersion, index=dr, name="dispersion") - - -def add_cumloss(df, index_type, series, engine): - cumloss = pd.read_sql_query( - "SELECT lastdate, cumulativeloss AS cumloss FROM index_version " - "WHERE index=%s and series=%s order by lastdate", - engine, - params=(index_type, series), - parse_dates=["lastdate"], - ) - cumloss.iat[-1, 0] = pd.Timestamp.max - cumloss = ( - cumloss.set_index("lastdate").sort_index().reindex(df.index, method="bfill") + return pd.DataFrame( + {"dispersion": dispersion, "cumloss": cumloss,}, index=dr, name="dispersion", ) - return df.join(cumloss) def get_corr_data(index_type, series, engine): @@ -68,39 +58,26 @@ def get_corr_data(index_type, series, engine): def get_tranche_data(index_type, engine): - sql_string = "select * from index_version where index = %s" - idx_ver = pd.read_sql_query( - sql_string, engine, params=[index_type,], parse_dates=["lastdate"] + sql_string = ( + "SELECT * FROM risk_numbers " + "LEFT JOIN index_version USING (index, series, version) " + "WHERE index = %s" ) - idx_ver["date"] = pd.to_datetime( - [ - d.strftime("%Y-%m-%d") if not pd.isnull(d) else datetime.date(2050, 1, 1) - for d in idx_ver["lastdate"] - ] - ) - sql_string = "select * from risk_numbers where index = %s" df = pd.read_sql_query( sql_string, engine, parse_dates={"date": {"utc": True}}, params=[index_type] ) - df["exp_percentage"] = df["expected_loss"] / df["index_expected_loss"] + del df["basketid"] df.date = df.date.dt.normalize().dt.tz_convert(None) - df = df.groupby(["date", "index", "series", "tenor", "attach"]).mean() - df.reset_index(inplace=True) - idx_ver.sort_values(by=["date"], inplace=True, ascending=True) - df = pd.merge_asof( - df, - idx_ver[["date", "series", "cumulativeloss", "indexfactor"]], - left_on=["date"], - right_on=["date"], - by="series", - direction="forward", - ) - df.set_index("date", inplace=True) - df["moneyness"] = df.apply( - lambda df: (df.detach - df.cumulativeloss) - / df.indexfactor - / df.index_expected_loss, - axis=1, + df = df.groupby( + ["date", "index", "series", "version", "tenor", "attach"], as_index=False + ).mean() + df = df.assign( + moneyness=lambda x: np.clip( + (x.detach - x.cumulativeloss) / x.indexfactor / x.index_expected_loss, + 0.0, + 1.0, + ), + exp_percentage=lambda x: x.expected_loss / x.index_expected_loss, ) return df @@ -119,7 +96,7 @@ def gini(array): def get_gini_spreadstdev(index_type, series, tenor, date): indices = MarkitBasketIndex(index_type, series, tenor, value_date=date) spreads = indices.spreads() - spreads = spreads[spreads < 1] + spreads = np.ravel(spreads) return (gini(spreads), np.std(spreads)) @@ -130,8 +107,6 @@ if __name__ == "__main__": dispersion = get_dispersion(index_type, series) df = get_corr_data(index_type, series, serenitas_engine) df = df.join(dispersion) - if index_type == "HY": - df = add_cumloss(df, index_type, series, serenitas_engine) if index_type == "HY": formula = "fisher ~ np.log(dispersion) + cumloss + np.log(index_duration)" |
