1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
|
import datetime
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
from analytics.basket_index import MarkitBasketIndex
from analytics import CreditIndex
from scipy.special import logit, expit
from utils.db import dbengine
def get_corr_data(index_type, series, engine):
sql_str = (
"SELECT quotedate::date, indexrefspread, indexrefprice, index_duration, "
"index_expected_loss, corr_at_detach "
"FROM tranche_risk JOIN tranche_quotes "
"ON tranche_risk.tranche_id=tranche_quotes.id "
"WHERE index=%s and series=%s and tenor='5yr' and detach=%s order by quotedate desc"
)
df = pd.read_sql_query(
sql_str,
engine,
params=(index_type, series, 3 if index_type == "IG" else 15),
index_col=["quotedate"],
parse_dates=["quotedate"],
)
if index_type == "HY":
spread_equivalent = []
index = CreditIndex(index_type, series, "5yr")
for k, v in df.iterrows():
index.value_date = k
index.ref = v["indexrefprice"]
spread_equivalent.append(index.spread)
df["indexrefspread"] = spread_equivalent
df = df.assign(
fisher=lambda x: 0.5 * np.log((1 + x.corr_at_detach) / (1 - x.corr_at_detach))
)
return df
def get_tranche_data(conn, index_type, tenor="5yr"):
sql_string = (
"SELECT * FROM risk_numbers "
"LEFT JOIN index_version USING (index, series, version) "
"WHERE index = %s AND tenor=%s"
)
df = pd.read_sql_query(
sql_string,
conn,
parse_dates={"date": {"utc": True}},
params=(index_type, tenor),
)
del df["basketid"]
df.date = (
df.date.dt.tz_convert("America/New_York").dt.tz_localize(None).dt.normalize()
)
df = df.groupby(
["date", "index", "series", "version", "tenor", "attach"], as_index=False
).mean()
df = df.assign(
exp_percentage=lambda x: x.expected_loss / x.index_expected_loss,
attach_adj=lambda x: np.maximum(
(x.attach - x.cumulativeloss) / df.indexfactor, 0
),
detach_adj=lambda x: np.minimum(
(x.detach - x.cumulativeloss) / df.indexfactor, 1
),
moneyness=lambda x: (x.detach_adj + x.attach_adj) / 2 / x.index_expected_loss,
)
df = df.set_index(["date", "index", "series", "version", "tenor", "attach"])
series = tuple(df.index.get_level_values("series").unique())
dispersion = pd.read_sql_query(
"SELECT date, index, series, version, tenor, dispersion, gini from index_quotes "
"WHERE index=%s AND series IN %s AND tenor=%s",
conn,
params=(index_type, series, tenor),
index_col=["date", "index", "series", "version", "tenor"],
)
df = df.join(dispersion)
return df
def create_models(conn, df) -> (pd.DataFrame, float):
# Takes the output of get_tranche_data
attach_max = df.index.get_level_values("attach").max()
bottom_stack = df[df.index.get_level_values("attach") != attach_max]
model = smf.ols(
"logit(exp_percentage) ~ np.log(index_duration) + "
"I(np.log(index_expected_loss)**2) + "
"np.log(moneyness)*dispersion + "
"np.log(index_expected_loss)*dispersion + "
"I(np.log(moneyness)**2) + I(np.log(moneyness)**3)",
data=bottom_stack,
)
f = model.fit()
df.loc[df.index.get_level_values("attach") != attach_max, "predict"] = expit(
f.predict(bottom_stack)
)
def aux(s):
temp = s.values
temp[-1] = 1 - temp[:-1].sum()
return temp
df["predict"] = df.groupby(["index", "series", "date"])["predict"].transform(aux)
return (df, model)
if __name__ == "__main__":
index_type = "HY"
series = 29
serenitas_engine = dbengine("serenitasdb")
dispersion = get_dispersion(index_type, series)
df = get_corr_data(index_type, series, serenitas_engine)
df = df.join(dispersion)
if index_type == "HY":
formula = "fisher ~ np.log(dispersion) + cumloss + np.log(index_duration)"
else:
formula = "fisher ~ np.log(dispersion) + np.log(indexrefspread) + np.log(index_duration)"
mod = smf.ols(formula=formula, data=df)
|