import pandas as pd from db import dbengine import matplotlib import numpy as np import matplotlib.pyplot as plt import statsmodels.api as sm from statsmodels.formula.api import gls import seaborn as sb def get_mark_df(asset_class = 'Subprime'): #Filter marks > 1000 where the marks are weird... df_external_marks = pd.read_sql_query("select * from external_marks_mapped where mark < 1000" , dbengine('dawndb')) date_range = df_external_marks.date.unique() df_trades = pd.DataFrame() for date in date_range: df_temp= pd.read_sql_query("select description, identifier, usd_market_value/price*100 as curr_ntl from risk_positions(%s, %s) where price >0 and length(identifier) = 9 " , dbengine('dawndb'), params = [date, asset_class]) df_temp['date'] = date df_trades = df_trades.append(df_temp) df = df_trades.merge(df_external_marks).dropna() return df.set_index(['date','identifier']) def calc_mark_diff(df): #All Sources (including manager...?!) - average, manager mark only, median, closest g = df.groupby(level = ['date','identifier']) avg_mark = g.mean() avg_mark = avg_mark.join(df[df.source == 'MANAGER']['mark'], rsuffix = '_manager') avg_mark = avg_mark.join(g.median()['mark'], rsuffix = '_median_all') def closest(x): if x.mark.count() > 1: x.dist = abs(x.mark - x.mark[x.source == 'MANAGER']) mark = x.mark[x.dist == x.dist[x.dist>0].min()].iloc[0] else: mark = x.mark[0] return mark temp = g.apply(closest) temp = temp.rename('mark_closest_all') avg_mark = avg_mark.join(temp) #Filtered Sources - choose PRICESERVE,PRICINGDIRECT,MARKIT,BVAL #Filtered Sources - now really it is everything sources = ['PRICESERVE', 'PRICINGDIRECT','BVAL','MARKIT','BROKER', 'REUTERS', 'S&P', 'PB', 'IDC'] df_filtered = df[df.source.isin(sources)] g1 = df_filtered.groupby(level = ['date','identifier']) #Filtered Sources - average,median, remove max min, closest avg_mark = avg_mark.join(g1.mean()['mark'], rsuffix = '_filtered') avg_mark = avg_mark.join(g1.median()['mark'], rsuffix = '_filtered_median') def remove_max_min(x): if x.count() >= 4: mark = (x.sum() - x.max() - x.min())/(x.count() -2) else: mark = x.mean() return mark avg_mark = avg_mark.join(g1.mark.apply(remove_max_min), rsuffix = '_filtered_no_max_min') #calculate difference: negative means Serenitas marks higher avg_mark1 = avg_mark.multiply(avg_mark.curr_ntl/100, axis = 'index') del avg_mark1['curr_ntl'] diff = avg_mark1.apply(lambda x: (x-x.mark_manager), axis = 1) #diff = difference to copy to performance number sheet diff = diff.groupby(level = 'date').sum() #count of each source g2 = df.set_index('source', append=True).groupby(level = ['date','source']) count = g2['mark'].count().unstack(-1) #diff_by_source: now calculate the pricing by source dftemp = df.drop('description', 1) dftemp = dftemp.set_index(['source'], append=True).apply(lambda x: x.curr_ntl * x.mark/100, axis = 1) dftemp = dftemp.groupby(level =['date','identifier','source']).mean() dftemp = dftemp.unstack(-1).apply(lambda x: (x-x.MANAGER), axis = 1) diff_by_source = dftemp.groupby(level = 'date').sum() #Diff by MV per source. diff_by_month = dftemp.groupby(level = 1).diff() return(diff, diff_by_source, count) def back_test(): df = pd.read_sql_table('external_marks_mapped', dbengine('dawndb'), parse_dates=['date']) df = df[df.source.notnull()] df_wide = (pd.pivot_table(df, 'mark', ['identifier', 'date'], 'source'). reset_index(). sort_values('date')) df_trades = {} df_trades['sell'] = pd.read_sql_query("select trade_date, identifier, faceamount, principal_payment, price " \ "from bonds where buysell = 'f'" , dbengine('dawndb'), parse_dates=['trade_date']) df_trades['buy'] = pd.read_sql_query("select trade_date, identifier, faceamount, principal_payment, price " \ "from bonds where buysell = 't'" , dbengine('dawndb'), parse_dates=['trade_date']) for direction, trades in df_trades.items(): df_trades[direction] = trades.sort_values('trade_date') df_sell_wide = pd.merge_asof(df_trades['sell'], df_wide, left_on='trade_date', right_on='date', by='identifier') df_wide_temp = df_wide.set_index('date').shift(periods = -1, freq= 'M', axis = 1).reset_index() df_buy_wide = pd.merge_asof(df_trades['buy'], df_wide_temp , left_on='trade_date', right_on='date', by='identifier') d_sell = {} d_buy = {} df_long = {} for source, g in df.groupby('source'): d_sell[source] = pd.merge_asof(df_trades['sell'], g, left_on='trade_date', right_on='date', by='identifier') d_sell[source]['dir'] = 'sell' d_buy[source] = pd.merge_asof(df_trades['buy'], g.set_index('date').shift(periods = -1, freq= 'BM', axis = 1).reset_index(), left_on='trade_date', right_on='date', by='identifier') d_buy[source]['dir'] = 'buy' #add a column saying it is buy or sell df_long = pd.concat(d_sell, join='inner') df_long = df_long.append(pd.concat(d_buy, join='inner')) df_long = df_long.reset_index().dropna(subset=['price','mark']) df_long['difference'] = (df_long['price'] - df_long['mark'])/df_long['mark'] #filtering diff_threshold = 5 sell_price_threshold = 200 begindate = '2013-01-01' enddate = '2018-01-01' df_long = df_long[df_long.identifier.str.len() == 9] df_long = df_long[df_long.price < sell_price_threshold] df_long = df_long[(df_long['trade_date'] > begindate) & (df_long['trade_date'] < enddate)] df_long.loc[df_long.source == 'MANAGER','source'] = 'LMCG' #pretty plot #df_long['dotsize'] = df_long['level_0'].apply(lambda x: 400 if x == 'MANAGER' else 20) plt.switch_backend('Agg') sb.set_style("whitegrid") sb.set_context("notebook") #plt.switch_backend('Qt4Agg') order = ['LMCG','BROKER','BVAL','IDC','MARKIT','PB','PRICESERVE','PRICINGDIRECT','REUTERS','S&P'] sb.set_palette(sb.hls_palette(10, l=.4, s=.8)) g_sell = sb.FacetGrid(df_long[df_long.dir == 'sell'], hue='source', hue_kws={'s':[50] + [20]*9, 'marker': ["o"]+["s"]*9, 'alpha': [1]+[.4]*9}, legend_out=True, aspect = 2.1, size = 4, hue_order = order) g_sell.set(ylim=(0, 105), xlim=(0, 105)) ax_sell = g_sell.map(plt.scatter, 'mark', 'price').add_legend() ax_sell.set_axis_labels('Mark','Sale Price') ax_sell.fig.savefig("/home/serenitas/edwin/backtest_sales.png") g_buy = sb.FacetGrid(df_long[df_long.dir == 'buy'], hue='source', hue_kws={'s':[50] + [20]*9, 'marker': ["o"]+["s"]*9, 'alpha': [1]+[.4]*9}, legend_out=True, aspect = 2.1, size = 4, hue_order = order) g_buy.set(ylim=(0, 105), xlim=(0, 105)) ax_buy = g_buy.map(plt.scatter, 'mark', 'price').add_legend() ax_buy.fig.savefig("/home/serenitas/edwin/backtest_buys.png") params = {} error = {} trade_PNL = {} trade_PNL_stat = {} #fit all the models at once for direction, df_temp in df_long[(df_long.difference < diff_threshold)].groupby('dir'): params[direction] = (df_temp. groupby('source'). apply(lambda df: gls('price~mark', df).fit().params)) error[direction] = [df_temp.groupby('source').mean()['difference'], df_temp.groupby('source').std()['difference']] trade_PNL[direction] = df_temp.groupby(['date', 'source']).apply( lambda df: sum(df.principal_payment * df.difference)/ sum(df.principal_payment)) trade_PNL_stat[direction] = [trade_PNL[direction].groupby(level = 'source').mean(), trade_PNL[direction].groupby(level = 'source').std()] buychart = trade_PNL['buy'].unstack(-1).plot()