import pandas as pd import matplotlib import numpy as np import matplotlib.pyplot as plt import statsmodels.api as sm import seaborn as sb import datetime import globeop_reports as ops from statsmodels.formula.api import gls from serenitas.utils.db import serenitas_engine, dawn_engine, DataError def get_mark_df(asset_class="Subprime"): # It used to be >1000 but as of 12/17/2018 changed it to 3000 # Filter marks >3000 where the marks are weird... df_external_marks = pd.read_sql_query( "select * from external_marks_mapped where mark < 3000", dawn_engine ) df_trades = pd.DataFrame() for date in df_external_marks["date"].unique(): df_temp = pd.read_sql_query( "select description, identifier, " "usd_market_value/price*100 as curr_ntl " "from risk_positions(%s, %s) " "where price >0 and length(identifier) = 9 ", dawn_engine, params=[date, asset_class], ) df_temp["date"] = date df_trades = df_trades.append(df_temp) df = df_trades.merge(df_external_marks).dropna() return df.set_index(["date", "identifier"]) def calc_mark_diff(df, source_remove=["PB"]): # use get_mark_df df = df[~df.source.isin(source_remove)] df = ( df.set_index("source", append=True) .groupby(level=["date", "identifier", "source"]) .mean() ) marks = df["mark"].unstack(-1).dropna(subset=["MANAGER"]) def remove_max_min(x): x = x.dropna() return ( (x.sum() - x.max() - x.min()) / (x.count() - 2) if x.count() >= 4 else x.mean() ) def pick_closest(x): dist = abs(x.dropna() - x["MANAGER"]) if dist.count() > 1: dist.drop("MANAGER", inplace=True) return x[dist.idxmin] def mean_with_default(x): return ( x["MANAGER"] if x.drop("MANAGER").count() == 0 else x.drop("MANAGER").mean() ) marks = pd.concat( [ marks.apply(mean_with_default, axis=1), marks.apply(remove_max_min, axis=1), marks.apply(pick_closest, axis=1), ], keys=["mean_vendor", "no_min_max", "closest_vendor"], axis=1, ) marks = marks.subtract(df.xs("MANAGER", level="source")["mark"], axis=0) marks = ( marks.multiply(df["curr_ntl"].groupby(["date", "identifier"]).mean(), axis=0) / 100 ) marks["manager"] = 0 return marks.groupby(level="date").sum() def diff_by_source(df): # use get_mark_df df = df.drop("description", 1) df = df.set_index(["source"], append=True).apply( lambda x: x.curr_ntl * x.mark / 100, axis=1 ) df = df.groupby(level=["date", "identifier", "source"]).mean() df = df.unstack(-1).apply(lambda x: (x - x.MANAGER), axis=1) return df.groupby(level="date").sum() def alt_navs(df): navs = ops.get_net_navs().sort_index() df = calc_mark_diff(df) end_nav, beg_nav, returns, nav_100 = ( pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(index=df.index, columns=df.columns), ) for col in df.columns: end_nav[col] = df[col] + navs.endbooknav beg_nav[col] = end_nav[col].shift(1) + navs.net_flow.shift(1) beg_nav[col].iloc[0] = 12500000 returns[col] = (end_nav[col] - navs.incentive_fee) / beg_nav[col] - 1 for i, row in returns.dropna().reset_index().iterrows(): nav_100.iloc[i] = 100 if i == 0 else nav_100.iloc[i - 1] nav_100.iloc[i] = nav_100.iloc[i] * (1 + returns.iloc[i]) return returns, nav_100 def back_test( begindate="2013-01-01", enddate=datetime.date.today(), sell_price_threshold=200 ): sql_string = "SELECT * FROM external_marks_mapped WHERE source IS NOT NULL" df = pd.read_sql_query( sql_string, dawn_engine, parse_dates=["date"], index_col=["date", "identifier", "source"], ) df = pd.pivot_table( df, values="mark", index=["identifier", "date"], columns="source" ) sql_string = "select trade_date, identifier, price from bond_trades where buysell=%s and asset_class = %s" df_trades = pd.read_sql_query( sql_string, dawn_engine, parse_dates=["trade_date"], params=[False, "Subprime"] ) df = pd.merge_asof( df_trades.sort_values("trade_date"), df.reset_index().sort_values("date"), left_on="trade_date", right_on="date", by="identifier", ).drop("date", 1) df = df.set_index(["trade_date", "identifier", "price"]).stack() df = df.reset_index().rename(columns={"level_3": "source", 0: "mark"}) df["difference"] = (df["price"] - df["mark"]) / df["mark"] # filtering df = df[df.identifier.str.len() == 9] df = df[df.price < sell_price_threshold] df = df[(df["trade_date"] > begindate) & (df["trade_date"] < enddate)] df.loc[df.source == "MANAGER", "source"] = "LMCG" return df def stats(df_long, diff_threshold=5): g = df_long[df_long.difference < diff_threshold].groupby("source") # fit all the models at once params = g.apply(lambda df: gls("price~mark", df).fit().params) error = pd.DataFrame([g.difference.mean(), g.difference.std()]) error.index = ["average", "standard deviation"] return params, error def pretty_plot(df_long): # plt.switch_backend('Agg') sb.set_style("whitegrid") sb.set_context("notebook") order = [ "LMCG", "BROKER", "BVAL", "IDC", "MARKIT", "PRICESERVE", "PRICINGDIRECT", "REUTERS", "S&P", ] sb.set_palette(sb.hls_palette(10, l=0.4, s=0.8)) grid = sb.FacetGrid( df_long, hue="source", hue_kws={ "s": [50] + [20] * 9, "marker": ["o"] + ["s"] * 9, "alpha": [1] + [0.4] * 9, }, legend_out=True, aspect=2.1, height=4, hue_order=order, ) grid.set(ylim=(0, 105), xlim=(0, 105)) ax = grid.map(plt.scatter, "mark", "price").add_legend() ax.set_axis_labels("Mark", "sale price") plt.plot([100, 0], [100, 0], color="black", lw=2, linestyle="solid") ax.fig.savefig("/home/serenitas/edwin/PythonGraphs/backtest.png")