import bottleneck as bn
import datetime
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf

from analytics.basket_index import MarkitBasketIndex
from analytics import CreditIndex
from dateutil.relativedelta import relativedelta
from utils.db import dbengine


def get_dispersion(index_type, series, end_date=datetime.date.today()):
    index = MarkitBasketIndex(index_type, series, ["5yr"])
    dr = pd.bdate_range(index.issue_date, end_date)

    dispersion = []
    for d in dr:
        index.value_date = d
        dispersion.append(bn.nanstd(np.log(index.spreads())))

    return pd.Series(dispersion, index=dr, name="dispersion")


def add_cumloss(df, index_type, series, engine):
    cumloss = pd.read_sql_query(
        "SELECT lastdate, cumulativeloss AS cumloss FROM index_version "
        "WHERE index=%s and series=%s order by lastdate",
        engine,
        params=(index_type, series),
        parse_dates=["lastdate"],
    )
    cumloss.iat[-1, 0] = pd.Timestamp.max
    cumloss = (
        cumloss.set_index("lastdate").sort_index().reindex(df.index, method="bfill")
    )
    return df.join(cumloss)


def get_corr_data(index_type, series, engine):
    sql_str = (
        "SELECT quotedate::date, indexrefspread, indexrefprice, index_duration, "
        "index_expected_loss, corr_at_detach "
        "FROM tranche_risk JOIN tranche_quotes "
        "ON tranche_risk.tranche_id=tranche_quotes.id "
        "WHERE index=%s and series=%s and tenor='5yr' and detach=%s order by quotedate desc"
    )
    df = pd.read_sql_query(
        sql_str,
        engine,
        params=(index_type, series, 3 if index_type == "IG" else 15),
        index_col=["quotedate"],
        parse_dates=["quotedate"],
    )
    if index_type == "HY":
        spread_equivalent = []
        index = CreditIndex(index_type, series, "5yr")
        for k, v in df.iterrows():
            index.value_date = k
            index.ref = v["indexrefprice"]
            spread_equivalent.append(index.spread)
        df["indexrefspread"] = spread_equivalent
    df = df.assign(
        fisher=lambda x: 0.5 * np.log((1 + x.corr_at_detach) / (1 - x.corr_at_detach))
    )
    return df


def get_tranche_data(index_type, engine):
    sql_string = "select * from index_version where index = %s"
    idx_ver = pd.read_sql_query(
        sql_string, engine, params=[index_type,], parse_dates=["lastdate"]
    )
    idx_ver["date"] = pd.to_datetime(
        [
            d.strftime("%Y-%m-%d") if not pd.isnull(d) else datetime.date(2050, 1, 1)
            for d in idx_ver["lastdate"]
        ]
    )
    sql_string = "select * from risk_numbers where index = %s"
    df = pd.read_sql_query(
        sql_string, engine, parse_dates={"date": {"utc": True}}, params=[index_type]
    )
    df["exp_percentage"] = df["expected_loss"] / df["index_expected_loss"]
    df.date = df.date.dt.normalize().dt.tz_convert(None)
    df = df.groupby(["date", "index", "series", "tenor", "attach"]).mean()
    df.reset_index(inplace=True)
    idx_ver.sort_values(by=["date"], inplace=True, ascending=True)
    df = pd.merge_asof(
        df,
        idx_ver[["date", "series", "cumulativeloss", "indexfactor"]],
        left_on=["date"],
        right_on=["date"],
        by="series",
        direction="forward",
    )
    df.set_index("date", inplace=True)
    df["moneyness"] = df.apply(
        lambda df: (df.detach - df.cumulativeloss)
        / df.indexfactor
        / df.index_expected_loss,
        axis=1,
    )
    return df


def gini(array):
    """Calculate the Gini coefficient of a numpy array."""
    if np.amin(array) < 0:
        array -= np.amin(array)  # values cannot be negative
    array += 0.0000001  # values cannot be 0
    array = np.sort(array)  # values must be sorted
    index = np.arange(1, array.shape[0] + 1)  # index per array element
    n = array.shape[0]  # number of array elements
    return (np.sum((2 * index - n - 1) * array)) / (n * np.sum(array))


def get_gini_spreadstdev(index_type, series, tenor, date):
    indices = MarkitBasketIndex(index_type, series, tenor, value_date=date)
    spreads = indices.spreads()
    spreads = spreads[spreads < 1]
    return (gini(spreads), np.std(spreads))


if __name__ == "__main__":
    index_type = "HY"
    series = 29
    serenitas_engine = dbengine("serenitasdb")
    dispersion = get_dispersion(index_type, series)
    df = get_corr_data(index_type, series, serenitas_engine)
    df = df.join(dispersion)
    if index_type == "HY":
        df = add_cumloss(df, index_type, series, serenitas_engine)

    if index_type == "HY":
        formula = "fisher ~ np.log(dispersion) + cumloss + np.log(index_duration)"
    else:
        formula = "fisher ~ np.log(dispersion) + np.log(indexrefspread) + np.log(index_duration)"
    mod = smf.ols(formula=formula, data=df)