import pandas as pd
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import seaborn as sb
import datetime
import globeop_reports as ops

from statsmodels.formula.api import gls
from utils.db import serenitas_engine, dawn_engine, DataError


def get_mark_df(asset_class="Subprime"):
    # It used to be >1000 but as of 12/17/2018 changed it to 3000
    # Filter marks >3000 where the marks are weird...
    df_external_marks = pd.read_sql_query(
        "select * from external_marks_mapped where mark < 3000", dawn_engine
    )
    df_trades = pd.DataFrame()
    for date in df_external_marks.date.unique():
        df_temp = pd.read_sql_query(
            "select description, identifier, "
            "usd_market_value/price*100 as curr_ntl "
            "from risk_positions(%s, %s) "
            "where price >0 and length(identifier) = 9 ",
            dawn_engine,
            params=[date, asset_class],
        )
        df_temp["date"] = date
        df_trades = df_trades.append(df_temp)
    df = df_trades.merge(df_external_marks).dropna()
    return df.set_index(["date", "identifier"])


def calc_mark_diff(df=None, sources=None):

    if sources is None:
        sources = [
            "PRICESERVE",
            "PRICINGDIRECT",
            "BVAL",
            "MARKIT",
            "BROKER",
            "REUTERS",
            "S&P",
            "IDC",
        ]
    if df is None:
        df = get_mark_df()
    # All Sources (including manager...?!) - average, manager mark only, median, closest
    g = df.groupby(level=["date", "identifier"])
    diff = g.mean()
    diff = diff.join(df[df.source == "MANAGER"]["mark"], rsuffix="_manager")
    diff = diff.join(g.median()["mark"], rsuffix="_median_all")
    temp = g.apply(closest)
    temp = temp.rename("mark_closest_all")
    diff = diff.join(temp)

    # Filtered Sources - mean, median, remove max min
    df_filtered = df[df.source.isin(sources)]
    g1 = df_filtered.groupby(level=["date", "identifier"])
    diff = diff.join(g1.mean()["mark"], rsuffix="_filtered_mean")
    diff = diff.join(g1.median()["mark"], rsuffix="_filtered_median")
    diff = diff.join(g1.mark.apply(remove_max_min), rsuffix="_filtered_no_max_min")

    # calculate difference: negative means Serenitas marks higher
    diff = diff.multiply(diff.curr_ntl / 100, axis="index")
    del diff["curr_ntl"]
    diff = diff.rename(columns={"mark": "mark_mean_all"})
    diff = diff.apply(lambda x: (x - x.mark_manager), axis=1)

    return diff.groupby(level="date").sum()


def closest(x):
    x["dist"] = abs(x.mark - x.mark[x.source == "MANAGER"]).fillna(0)
    return x.mark[x.dist == x.dist.min()].iloc[0]


def remove_max_min(x):
    if x.count() >= 4:
        return (x.sum() - x.max() - x.min()) / (x.count() - 2)
    else:
        return x.mean()


def diff_by_source(df=None):
    # diff_by_source: input get_mark_df(), calculate the pricing by source
    if df is None:
        df = get_mark_df()
    df = df.drop("description", 1)
    df = df.set_index(["source"], append=True).apply(
        lambda x: x.curr_ntl * x.mark / 100, axis=1
    )
    df = df.groupby(level=["date", "identifier", "source"]).mean()
    df = df.unstack(-1).apply(lambda x: (x - x.MANAGER), axis=1)
    return df.groupby(level="date").sum()


def alt_navs(df=None):
    navs = ops.get_net_navs().sort_index()
    if df is None:
        df = get_mark_df()
    df = calc_mark_diff(df)
    end_nav, beg_nav, returns, nav_100 = (
        pd.DataFrame(),
        pd.DataFrame(),
        pd.DataFrame(),
        pd.DataFrame(index=df.index, columns=df.columns),
    )
    for col in df.columns:
        end_nav[col] = df[col] + navs.endbooknav
        beg_nav[col] = end_nav[col].shift(1) + navs.net_flow.shift(1)
        beg_nav[col].iloc[0] = 12500000
        returns[col] = (end_nav[col] - navs.incentive_fee) / beg_nav[col] - 1
    for i, row in returns.dropna().reset_index().iterrows():
        nav_100.iloc[i] = 100 if i == 0 else nav_100.iloc[i - 1]
        nav_100.iloc[i] = nav_100.iloc[i] * (1 + returns.iloc[i])
    return returns, nav_100


def back_test(
    begindate="2013-01-01", enddate=datetime.date.today(), sell_price_threshold=200
):
    sql_string = "SELECT * FROM external_marks_mapped WHERE source IS NOT NULL"
    df = pd.read_sql_query(
        sql_string,
        dawn_engine,
        parse_dates=["date"],
        index_col=["date", "identifier", "source"],
    )
    df = pd.pivot_table(
        df, values="mark", index=["identifier", "date"], columns="source"
    )

    sql_string = "select trade_date, identifier, price from bonds where buysell=%s and asset_class = %s"
    df_trades = pd.read_sql_query(
        sql_string, dawn_engine, parse_dates=["trade_date"], params=[False, "Subprime"]
    )

    df = pd.merge_asof(
        df_trades.sort_values("trade_date"),
        df.reset_index().sort_values("date"),
        left_on="trade_date",
        right_on="date",
        by="identifier",
    ).drop("date", 1)

    df = df.set_index(["trade_date", "identifier", "price"]).stack()
    df = df.reset_index().rename(columns={"level_3": "source", 0: "mark"})
    df["difference"] = (df["price"] - df["mark"]) / df["mark"]

    # filtering
    df = df[df.identifier.str.len() == 9]
    df = df[df.price < sell_price_threshold]
    df = df[(df["trade_date"] > begindate) & (df["trade_date"] < enddate)]
    df.loc[df.source == "MANAGER", "source"] = "LMCG"

    return df


def stats(df_long, diff_threshold=5):

    g = df_long[df_long.difference < diff_threshold].groupby("source")

    # fit all the models at once
    params = g.apply(lambda df: gls("price~mark", df).fit().params)
    error = pd.DataFrame([g.difference.mean(), g.difference.std()])
    error.index = ["average", "standard deviation"]

    return params, error


def pretty_plot(df_long):

    # plt.switch_backend('Agg')
    sb.set_style("whitegrid")
    sb.set_context("notebook")

    order = [
        "LMCG",
        "BROKER",
        "BVAL",
        "IDC",
        "MARKIT",
        "PRICESERVE",
        "PRICINGDIRECT",
        "REUTERS",
        "S&P",
    ]
    sb.set_palette(sb.hls_palette(10, l=0.4, s=0.8))

    grid = sb.FacetGrid(
        df_long,
        hue="source",
        hue_kws={
            "s": [50] + [20] * 9,
            "marker": ["o"] + ["s"] * 9,
            "alpha": [1] + [0.4] * 9,
        },
        legend_out=True,
        aspect=2.1,
        height=4,
        hue_order=order,
    )
    grid.set(ylim=(0, 105), xlim=(0, 105))
    ax = grid.map(plt.scatter, "mark", "price").add_legend()
    ax.set_axis_labels("Mark", "sale price")
    plt.plot([100, 0], [100, 0], color="black", lw=2, linestyle="solid")
    ax.fig.savefig("/home/serenitas/edwin/PythonGraphs/backtest.png")