import pandas as pd import matplotlib import numpy as np import matplotlib.pyplot as plt import statsmodels.api as sm import seaborn as sb import datetime import globeop_reports as ops from statsmodels.formula.api import gls from utils.db import serenitas_engine, dawn_engine, DataError def get_mark_df(asset_class="Subprime"): # It used to be >1000 but as of 12/17/2018 changed it to 3000 # Filter marks >3000 where the marks are weird... df_external_marks = pd.read_sql_query( "select * from external_marks_mapped where mark < 3000", dawn_engine ) df_trades = pd.DataFrame() for date in df_external_marks.date.unique(): df_temp = pd.read_sql_query( "select description, identifier, " "usd_market_value/price*100 as curr_ntl " "from risk_positions(%s, %s) " "where price >0 and length(identifier) = 9 ", dawn_engine, params=[date, asset_class], ) df_temp["date"] = date df_trades = df_trades.append(df_temp) df = df_trades.merge(df_external_marks).dropna() return df.set_index(["date", "identifier"]) def calc_mark_diff(df=None, sources=None): if sources is None: sources = [ "PRICESERVE", "PRICINGDIRECT", "BVAL", "MARKIT", "BROKER", "REUTERS", "S&P", "IDC", ] if df is None: df = get_mark_df() # All Sources (including manager...?!) - average, manager mark only, median, closest g = df.groupby(level=["date", "identifier"]) diff = g.mean() diff = diff.join(df[df.source == "MANAGER"]["mark"], rsuffix="_manager") diff = diff.join(g.median()["mark"], rsuffix="_median_all") temp = g.apply(closest) temp = temp.rename("mark_closest_all") diff = diff.join(temp) # Filtered Sources - mean, median, remove max min df_filtered = df[df.source.isin(sources)] g1 = df_filtered.groupby(level=["date", "identifier"]) diff = diff.join(g1.mean()["mark"], rsuffix="_filtered_mean") diff = diff.join(g1.median()["mark"], rsuffix="_filtered_median") diff = diff.join(g1.mark.apply(remove_max_min), rsuffix="_filtered_no_max_min") # calculate difference: negative means Serenitas marks higher diff = diff.multiply(diff.curr_ntl / 100, axis="index") del diff["curr_ntl"] diff = diff.rename(columns={"mark": "mark_mean_all"}) diff = diff.apply(lambda x: (x - x.mark_manager), axis=1) return diff.groupby(level="date").sum() def closest(x): x["dist"] = abs(x.mark - x.mark[x.source == "MANAGER"]).fillna(0) return x.mark[x.dist == x.dist.min()].iloc[0] def remove_max_min(x): if x.count() >= 4: return (x.sum() - x.max() - x.min()) / (x.count() - 2) else: return x.mean() def diff_by_source(df=None): # diff_by_source: input get_mark_df(), calculate the pricing by source if df is None: df = get_mark_df() df = df.drop("description", 1) df = df.set_index(["source"], append=True).apply( lambda x: x.curr_ntl * x.mark / 100, axis=1 ) df = df.groupby(level=["date", "identifier", "source"]).mean() df = df.unstack(-1).apply(lambda x: (x - x.MANAGER), axis=1) return df.groupby(level="date").sum() def alt_navs(df=None): navs = ops.get_net_navs().sort_index() if df is None: df = get_mark_df() df = calc_mark_diff(df) end_nav, beg_nav, returns, nav_100 = ( pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(index=df.index, columns=df.columns), ) for col in df.columns: end_nav[col] = df[col] + navs.endbooknav beg_nav[col] = end_nav[col].shift(1) + navs.net_flow.shift(1) beg_nav[col].iloc[0] = 12500000 returns[col] = (end_nav[col] - navs.incentive_fee) / beg_nav[col] - 1 for i, row in returns.dropna().reset_index().iterrows(): nav_100.iloc[i] = 100 if i == 0 else nav_100.iloc[i - 1] nav_100.iloc[i] = nav_100.iloc[i] * (1 + returns.iloc[i]) return returns, nav_100 def back_test( begindate="2013-01-01", enddate=datetime.date.today(), sell_price_threshold=200 ): sql_string = "SELECT * FROM external_marks_mapped WHERE source IS NOT NULL" df = pd.read_sql_query( sql_string, dawn_engine, parse_dates=["date"], index_col=["date", "identifier", "source"], ) df = pd.pivot_table( df, values="mark", index=["identifier", "date"], columns="source" ) sql_string = "select trade_date, identifier, price from bonds where buysell=%s and asset_class = %s" df_trades = pd.read_sql_query( sql_string, dawn_engine, parse_dates=["trade_date"], params=[False, "Subprime"] ) df = pd.merge_asof( df_trades.sort_values("trade_date"), df.reset_index().sort_values("date"), left_on="trade_date", right_on="date", by="identifier", ).drop("date", 1) df = df.set_index(["trade_date", "identifier", "price"]).stack() df = df.reset_index().rename(columns={"level_3": "source", 0: "mark"}) df["difference"] = (df["price"] - df["mark"]) / df["mark"] # filtering df = df[df.identifier.str.len() == 9] df = df[df.price < sell_price_threshold] df = df[(df["trade_date"] > begindate) & (df["trade_date"] < enddate)] df.loc[df.source == "MANAGER", "source"] = "LMCG" return df def stats(df_long, diff_threshold=5): g = df_long[df_long.difference < diff_threshold].groupby("source") # fit all the models at once params = g.apply(lambda df: gls("price~mark", df).fit().params) error = pd.DataFrame([g.difference.mean(), g.difference.std()]) error.index = ["average", "standard deviation"] return params, error def pretty_plot(df_long): # plt.switch_backend('Agg') sb.set_style("whitegrid") sb.set_context("notebook") order = [ "LMCG", "BROKER", "BVAL", "IDC", "MARKIT", "PRICESERVE", "PRICINGDIRECT", "REUTERS", "S&P", ] sb.set_palette(sb.hls_palette(10, l=0.4, s=0.8)) grid = sb.FacetGrid( df_long, hue="source", hue_kws={ "s": [50] + [20] * 9, "marker": ["o"] + ["s"] * 9, "alpha": [1] + [0.4] * 9, }, legend_out=True, aspect=2.1, height=4, hue_order=order, ) grid.set(ylim=(0, 105), xlim=(0, 105)) ax = grid.map(plt.scatter, "mark", "price").add_legend() ax.set_axis_labels("Mark", "sale price") plt.plot([100, 0], [100, 0], color="black", lw=2, linestyle="solid") ax.fig.savefig("/home/serenitas/edwin/PythonGraphs/backtest.png")