import cvxpy
import datetime
import math
import numpy as np
import pandas as pd

from pandas.tseries.offsets import BDay
from arch import arch_model
from utils.db import dbengine, dbconn
from scipy.interpolate import interp1d
from analytics import CreditIndex, ForwardIndex
from analytics.index_data import index_returns

serenitasdb = dbengine("serenitasdb")


def realized_vol(
    index, series=None, tenor="5yr", from_date=None, years=3, return_type="spread"
):
    """computes the realized spread volatility"""
    returns = index_returns(None, index, series, tenor, from_date, years)
    returns = returns.groupby(level="date").nth(-1).dropna()
    # GARCH(1,1) volatility process with constant mean, scale to help with fitting
    scale = 10
    am = arch_model(scale * returns[f"{return_type}_return"])
    res = am.fit(update_freq=0, disp="off")
    return (res.conditional_volatility * math.sqrt(252) / scale, res)


def lr_var(res):
    r""" computes long run variance of the garch process

    .. math::

    \sigma^2=\frac{\omega}{1-\sum_{i=1}^p \alpha_i + \sum_{i=1}^q \beta_i}

    """
    names = res.model.volatility.parameter_names()
    ## names[0] is omega, rest is alpha[1],..., alpha[p], beta[1],...,beta[q]
    var = res.params[names[0]] / (1 - res.params[names[1:]])
    return math.sqrt(var * 252)


def atm_vol_calc(df, index_type, moneyness):
    df = df.set_index("ref", append=True)
    r = np.empty((len(df.index.unique()), 3))
    i = 0
    index_keys = []
    for s, g1 in df.groupby(level="series"):
        index = CreditIndex(index_type, s, "5yr", value_date=g1.index[0][0])
        for date, g2 in g1.groupby(pd.Grouper(level="quotedate", freq="D")):
            if not g2.empty:
                index.value_date = date.date()
            for (ref, expiry), g3 in g2.reset_index("expiry").groupby(
                ["ref", "expiry"]
            ):
                index.ref = ref
                atm_val = forward_spread = ForwardIndex(
                    index, expiry, False
                ).forward_spread
                otm_val = atm_val * (1 + moneyness)
                if index._quote_is_price:
                    index.spread = atm_val
                    atm_val = index.price
                    index.spread = otm_val
                    otm_val = index.price
                for quotedate, v in g3.groupby(level="quotedate"):
                    f = interp1d(
                        v.strike.values, v.vol.values, fill_value="extrapolate"
                    )
                    r[i, 0] = forward_spread
                    r[i, 1:] = f([atm_val, otm_val])
                    i += 1
                    index_keys.append((quotedate, expiry, s))
    df = pd.DataFrame(
        data=r,
        index=pd.MultiIndex.from_tuples(
            index_keys, names=["quotedate", "expiry", "series"]
        ),
        columns=["forward_spread", "atm_vol", "otm_vol"],
    )
    df["T"] = df.index.get_level_values("expiry").values.astype(
        "datetime64[D]"
    ) - df.index.get_level_values("quotedate").values.astype("datetime64[D]")
    df["T"] = df["T"].dt.days / 365
    return df


def atm_vol(index, date, series=None, moneyness=0.2):
    extra_filter = ""
    params = (index.upper(), date)
    if series:
        extra_filter = " AND series=%s"
        params = params + (series,)
    sql_str = (
        "SELECT * from swaption_ref_quotes JOIN swaption_quotes "
        "USING (ref_id) WHERE index=%s "
        f"and quotedate>=%s {extra_filter} ORDER BY quotedate ASC"
    )

    df = pd.read_sql_query(sql_str, serenitasdb, params=params)
    df.quotedate = pd.to_datetime(df.quotedate, utc=True).dt.tz_convert(
        "America/New_York"
    )
    df = df.set_index(["quotedate", "index", "series", "expiry"])
    df = df.groupby(level=["quotedate", "index", "series", "expiry"]).filter(
        lambda x: len(x) > 2
    )
    return atm_vol_calc(df, index, moneyness)


def rolling_vol(df, col="atm_vol", term=[3]):
    """compute the rolling volatility for various terms"""
    df = df.reset_index(level=["expiry", "series"])
    df = df.groupby(df.index).filter(lambda x: len(x) > 2)

    def aux(s, col, term):
        k = s.index[0]
        f = interp1d(
            s.expiry.values.astype("float"), s[col].values, fill_value="extrapolate"
        )
        x = np.array(
            [
                (k + pd.DateOffset(months=t)).to_datetime64().astype("float")
                for t in term
            ]
        )
        return pd.Series(f(x), index=[str(t) + "m" for t in term])

    df = df.groupby(level="quotedate").apply(aux, col, term)
    # MS quotes don't have fwdspread so they end up as NA
    return df.dropna()


def vol_var(percentile=0.975, index="IG", start_date=datetime.date(2014, 6, 11)):
    """compute lo and hi percentiles of atm volatility daily change

    Probably overestimates:
    - we don't check that the quotes come from the same dealer
    - we should group it by series
    """
    df = atm_vol(index, start_date)
    df = rolling_vol(df, term=[1, 2, 3])
    df = df.sort_index()
    df = df.groupby(df.index.date).nth(-1)
    return df.diff().quantile([1 - percentile, percentile])


def get_index_spread(index, series, date, conn):
    with conn.cursor() as c:
        c.execute(
            "SELECT closespread from index_quotes "
            "WHERE index=%s and series=%s and date=%s and tenor='5yr'",
            (index, series, date),
        )
        try:
            spread, = c.fetchone()
        except TypeError:
            spread = None
    conn.commit()
    return spread


def get_index_ref(index, series, date, expiry, conn):
    with conn.cursor() as c:
        c.execute(
            "SELECT ref, fwdspread from swaption_ref_quotes "
            "WHERE index=%s and series=%s and quotedate::date=%s "
            "AND expiry=%s ORDER BY quotedate desc",
            (index, series, date, expiry),
        )
        try:
            ref, fwdspread = c.fetchone()
        except TypeError:
            ref, fwdspread = None, None
    conn.commit()
    return ref, fwdspread


def get_option_pnl(strike, expiry, index, series, start_date, engine):
    for s in [strike, strike + 2.5, strike - 2.5, strike + 5]:
        df = pd.read_sql_query(
            "SELECT quotedate, (pay_bid+pay_offer)/2 AS pay_mid, "
            "(rec_bid+rec_offer)/2 AS rec_mid FROM swaption_quotes "
            "WHERE strike=%s and expiry=%s and index=%s and series=%s"
            "and quotedate>=%s",
            engine,
            params=(s, expiry, index, series, start_date),
            index_col="quotedate",
            parse_dates=["quotedate"],
        )
        if not df.empty and df.index[0].date() == start_date:
            strike = s
            break
    else:
        raise ValueError("Couldn't find data starting from that date")

    if not pd.api.types.is_datetime64tz_dtype(df.index):
        df.index = df.index.tz_localize("utc")

    df = df.groupby(df.index.normalize()).nth(-1)
    if expiry < datetime.date.today():
        spread = get_index_spread(index, series, expiry, engine.raw_connection())
        underlying = Index.from_name(index, series, "5yr", expiry, 1e4)
        underlying.spread = spread
        pv = underlying.pv
        underlying.spread = strike
        if spread > strike:
            pay_mid, rec_mid = pv - underlying.pv, 0
        else:
            pay_mid, rec_mid = 0, underlying.pv - pv
            pv = underlying.pv
        df = df.append(
            pd.DataFrame(
                [[pay_mid, rec_mid]],
                columns=["pay_mid", "rec_mid"],
                index=[pd.Timestamp(expiry, tz="UTC")],
            )
        )
    return df, strike


def sell_vol_strategy(index="IG", months=3):
    conn = serenitasdb.raw_connection()
    with conn.cursor() as c1, conn.cursor() as c2:
        c1.execute(
            "SELECT DISTINCT series, expiry FROM "
            "swaption_quotes ORDER BY expiry, series desc"
        )
        d = {}
        for series, expiry in c1:
            start_date = BDay().rollback(expiry - pd.DateOffset(months=months)).date()
            if start_date > datetime.date.today():
                break
            c2.execute(
                "SELECT max(quotedate::date) FROM swaption_quotes WHERE "
                "index=%s AND series=%s AND expiry=%s AND quotedate<=%s",
                (index, series, expiry, start_date),
            )
            actual_start_date, = c2.fetchone()
            if actual_start_date is None or (start_date - actual_start_date).days > 5:
                continue
            ref, fwdspread = get_index_ref(
                index, series, actual_start_date, expiry, conn
            )
            if fwdspread is None:
                fwdspread = ref + months / 50  # TODO: use actual values
            strike = round(fwdspread / 2.5) * 2.5
            pnl, strike = get_option_pnl(
                strike, expiry, index, series, actual_start_date, engine
            )
            d[(series, strike, expiry)] = pnl
    conn.commit()
    return d


def aggregate_trades(d):
    r = pd.Series()
    for v in d.values():
        r = r.add(-v.sum(1).diff().dropna(), fill_value=0)
    return r


def compute_allocation(df):
    Sigma = df.cov().values
    gamma = cvxpy.Parameter(sign="positive")
    mu = df.mean().values
    w = cvxpy.Variable(3)
    ret = mu.T * w
    risk = cvxpy.quad_form(w, Sigma)
    prob = cvxpy.Problem(
        cvxpy.Maximize(ret - gamma * risk), [cvxpy.sum_entries(w) == 1, w >= -2, w <= 2]
    )

    gamma_x = np.linspace(0, 0.02, 500)
    W = np.empty((3, gamma_x.size))
    for i, val in enumerate(gamma_x):
        gamma.value = val
        prob.solve()
        W[:, i] = np.asarray(w.value).squeeze()

    fund_return = mu @ W
    fund_vol = np.array(
        [math.sqrt(W[:, i] @ Sigma @ W[:, i]) for i in range(gamma_x.size)]
    )
    return (W, fund_return, fund_vol)


if __name__ == "__main__":
    # d1 = sell_vol_strategy(months=1)
    # d2 = sell_vol_strategy(months=2)
    # d3 = sell_vol_strategy(months=3)
    # all_tenors = pd.concat([aggregate_trades(d) for d in [d1, d2, d3]], axis=1)
    # all_tenors.columns = ['1m', '2m', '3m']
    # all_tenors['optimal'] = ((1.2*all_tenors['1m']).
    #                          sub(1.2*all_tenors['2m'], fill_value=0).
    #                          add(all_tenors['3m'], fill_value=0))
    import datetime
    import statsmodels.formula.api as smf

    ## HY
    df = atm_vol("HY", datetime.date(2017, 3, 20))
    df["forward_spread"] *= 1e-4
    df["log_forward_spread"] = np.log(df["forward_spread"])
    df["log_atm_vol"] = np.log(df["atm_vol"])
    df_hy28 = df.xs(28, level="series")
    results = smf.ols("log_atm_vol ~ log_forward_spread + T", data=df_hy28).fit()
    beta_hy28 = 1 + results.params.log_forward_spread
    print(results.summary())

    ## IG
    df = atm_vol("IG", datetime.date(2017, 3, 20))
    df["forward_spread"] *= 1e-4
    df["log_forward_spread"] = np.log(df["forward_spread"])
    df["log_atm_vol"] = np.log(df["atm_vol"])
    df_ig28 = df.xs(28, level="series")
    results = smf.ols("log_atm_vol ~ log_forward_spread + T", data=df_ig28).fit()
    beta_ig28 = 1 + results.params.log_forward_spread
    print(results.summary())