diff options
Diffstat (limited to 'python')
| -rw-r--r-- | python/exploration/dispersion.py | 83 |
1 files changed, 83 insertions, 0 deletions
diff --git a/python/exploration/dispersion.py b/python/exploration/dispersion.py new file mode 100644 index 00000000..01f29a89 --- /dev/null +++ b/python/exploration/dispersion.py @@ -0,0 +1,83 @@ +import datetime +import numpy as np +import pandas as pd +import statsmodels.api as sm +import statsmodels.formula.api as smf + +from analytics.basket_index import MarkitBasketIndex +from analytics import CreditIndex +from dateutil.relativedelta import relativedelta +from utils.db import dbengine + + +def get_dispersion(index_type, series, end_date=datetime.date.today()): + index = MarkitBasketIndex(index_type, series, ["5yr"]) + dr = pd.bdate_range(index.issue_date, end_date) + + dispersion = [] + for d in dr: + index.value_date = d + dispersion.append(np.log(index.spreads()).std()) + + return pd.Series(dispersion, index=dr, name="dispersion") + + +def add_cumloss(df, index_type, series, engine): + cumloss = pd.read_sql_query( + "SELECT lastdate, cumulativeloss AS cumloss FROM index_version " + "WHERE index=%s and series=%s order by lastdate", + engine, + params=(index_type, series), + parse_dates=["lastdate"], + ) + cumloss.iat[-1, 0] = pd.Timestamp.max + cumloss = ( + cumloss.set_index("lastdate").sort_index().reindex(df.index, method="bfill") + ) + return df.join(cumloss) + + +def get_corr_data(index_type, series, engine): + sql_str = ( + "SELECT quotedate::date, indexrefspread, indexrefprice, index_duration, " + "index_expected_loss, corr_at_detach " + "FROM tranche_risk JOIN tranche_quotes " + "ON tranche_risk.tranche_id=tranche_quotes.id " + "WHERE index=%s and series=%s and tenor='5yr' and detach=%s order by quotedate desc" + ) + df = pd.read_sql_query( + sql_str, + engine, + params=(index_type, series, 3 if index_type == "IG" else 15), + index_col=["quotedate"], + parse_dates=["quotedate"], + ) + if index_type == "HY": + spread_equivalent = [] + index = CreditIndex(index_type, series, "5yr") + for k, v in df.iterrows(): + index.value_date = k + index.ref = v["indexrefprice"] + spread_equivalent.append(index.spread) + df["indexrefspread"] = spread_equivalent + df = df.assign( + fisher=lambda x: 0.5 * np.log((1 + x.corr_at_detach) / (1 - x.corr_at_detach)) + ) + return df + + +if __name__ == "__main__": + index_type = "HY" + series = 29 + serenitas_engine = dbengine("serenitasdb") + dispersion = get_dispersion(index_type, series) + df = get_corr_data(index_type, series, serenitas_engine) + df = df.join(dispersion) + if index_type == "HY": + df = add_cumloss(df, index_type, series, serenitas_engine) + + if index_type == "HY": + formula = "fisher ~ np.log(dispersion) + cumloss + np.log(index_duration)" + else: + formula = "fisher ~ np.log(dispersion) + np.log(indexrefspread) + np.log(index_duration)" + mod = smf.ols(formula=formula, data=df) |
