1 files changed, 56 insertions, 0 deletions
diff --git a/python/exploration/dispersion.py b/python/exploration/dispersion.py
index e4e1c1e6..f4c7a076 100644
--- a/python/exploration/dispersion.py
+++ b/python/exploration/dispersion.py
@@ -67,6 +67,62 @@ def get_corr_data(index_type, series, engine):
     return df
 
 
+def get_tranche_data(index_type, engine):
+    sql_string = "select * from index_version where index = %s"
+    idx_ver = pd.read_sql_query(
+        sql_string, engine, params=[index_type,], parse_dates=["lastdate"]
+    )
+    idx_ver["date"] = pd.to_datetime(
+        [
+            d.strftime("%Y-%m-%d") if not pd.isnull(d) else datetime.date(2050, 1, 1)
+            for d in idx_ver["lastdate"]
+        ]
+    )
+    sql_string = "select * from risk_numbers where index = %s"
+    df = pd.read_sql_query(
+        sql_string, engine, parse_dates={"date": {"utc": True}}, params=[index_type]
+    )
+    df["exp_percentage"] = df["expected_loss"] / df["index_expected_loss"]
+    df.date = df.date.dt.normalize().dt.tz_convert(None)
+    df = df.groupby(["date", "index", "series", "tenor", "attach"]).mean()
+    df.reset_index(inplace=True)
+    idx_ver.sort_values(by=["date"], inplace=True, ascending=True)
+    df = pd.merge_asof(
+        df,
+        idx_ver[["date", "series", "cumulativeloss", "indexfactor"]],
+        left_on=["date"],
+        right_on=["date"],
+        by="series",
+        direction="forward",
+    )
+    df.set_index("date", inplace=True)
+    df["moneyness"] = df.apply(
+        lambda df: (df.detach - df.cumulativeloss)
+        / df.indexfactor
+        / df.index_expected_loss,
+        axis=1,
+    )
+    return df
+
+
+def gini(array):
+    """Calculate the Gini coefficient of a numpy array."""
+    if np.amin(array) < 0:
+        array -= np.amin(array)  # values cannot be negative
+    array += 0.0000001  # values cannot be 0
+    array = np.sort(array)  # values must be sorted
+    index = np.arange(1, array.shape[0] + 1)  # index per array element
+    n = array.shape[0]  # number of array elements
+    return (np.sum((2 * index - n - 1) * array)) / (n * np.sum(array))
+
+
+def get_gini_spreadstdev(index_type, series, tenor, date):
+    indices = MarkitBasketIndex(index_type, series, tenor, value_date=date)
+    spreads = indices.spreads()
+    spreads = spreads[spreads < 1]
+    return (gini(spreads), np.std(spreads))
+
+
 if __name__ == "__main__":
     index_type = "HY"
     series = 29