import pandas as pd from db import dbengine import matplotlib import numpy as np import matplotlib.pyplot as plt import statsmodels.api as sm from statsmodels.formula.api import gls import seaborn as sb def calc_mark_diff(asset_class = 'Subprime'): #Filter marks > 1000 where the marks are weird... df_external_marks = pd.read_sql_query("select * from external_marks_mapped where mark < 1000" , dbengine('dawndb')) date_range = df_external_marks.date.unique() df_trades = pd.DataFrame() for date in date_range: df_temp= pd.read_sql_query("select description, identifier, usd_market_value/price*100 as curr_ntl from risk_positions(%s, %s) where price >0 and length(identifier) = 9 " , dbengine('dawndb'), params = [date, asset_class]) df_temp['date'] = date df_trades = df_trades.append(df_temp) df = df_trades.merge(df_external_marks).dropna() df = df.set_index(['date','identifier']) #All Sources - average, manager mark only, median, closest g = df.groupby(level = ['date','identifier']) avg_mark = g.mean() avg_mark = avg_mark.join(df[df.source == 'MANAGER']['mark'], rsuffix = '_manager') avg_mark = avg_mark.join(g.median()['mark'], rsuffix = '_median_all') def closest(x): if x.mark.count() > 1: x.dist = abs(x.mark - x.mark[x.source == 'MANAGER']) mark = x.mark[x.dist == x.dist[x.dist>0].min()].iloc[0] else: mark = x.mark[0] return mark temp = g.apply(closest) temp = temp.rename('mark_closest_all') avg_mark = avg_mark.join(temp) #Filtered Sources - choose PRICESERVE,PRICINGDIRECT,MARKIT,BVAL sources = ['PRICESERVE', 'PRICINGDIRECT','BVAL','MARKIT','BROKER'] df_filtered = df[df.source.isin(sources)] g1 = df_filtered.groupby(level = ['date','identifier']) #Filtered Sources - average,median, remove max min, closest avg_mark = avg_mark.join(g1.mean()['mark'], rsuffix = '_filtered') avg_mark = avg_mark.join(g1.median()['mark'], rsuffix = '_filtered_median') def remove_max_min(x): if x.count() >= 4: mark = (x.sum() - x.max() - x.min())/(x.count() -2) else: mark = x.mean() return mark avg_mark = avg_mark.join(g1.mark.apply(remove_max_min), rsuffix = '_filtered_no_max_min') #calculate difference: negative means Serenitas marks higher avg_mark1 = avg_mark.multiply(avg_mark.curr_ntl/100, axis = 'index') del avg_mark1['curr_ntl'] diff = avg_mark1.apply(lambda x: (x-x.mark_manager), axis = 1) diff = diff.groupby(level = 'date').sum() diff.to_clipboard() def back_test(): df = pd.read_sql_table('external_marks_mapped', dbengine('dawndb'), parse_dates=['date']) df = df[df.source.notnull()] df_wide = (pd.pivot_table(df, 'mark', ['identifier', 'date'], 'source'). reset_index(). sort_values('date')) df_trades = {} df_trades['sell'] = pd.read_sql_query("select trade_date, identifier, faceamount, principal_payment, price " \ "from bonds where buysell = 'f'" , dbengine('dawndb'), parse_dates=['trade_date']) df_trades['buy'] = pd.read_sql_query("select trade_date, identifier, faceamount, principal_payment, price " \ "from bonds where buysell = 't'" , dbengine('dawndb'), parse_dates=['trade_date']) for direction, trades in df_trades.items(): df_trades[direction] = trades.sort_values('trade_date') df_sell_wide = pd.merge_asof(df_trades['sell'], df_wide, left_on='trade_date', right_on='date', by='identifier') df_wide_temp = df_wide.set_index('date').shift(periods = -1, freq= 'M', axis = 1).reset_index() df_buy_wide = pd.merge_asof(df_trades['buy'], df_wide_temp , left_on='trade_date', right_on='date', by='identifier') d_sell = {} d_buy = {} df_long = {} for source, g in df.groupby('source'): d_sell[source] = pd.merge_asof(df_trades['sell'], g, left_on='trade_date', right_on='date', by='identifier') d_sell[source]['dir'] = 'sell' d_buy[source] = pd.merge_asof(df_trades['buy'], g.set_index('date').shift(periods = -1, freq= 'BM', axis = 1).reset_index(), left_on='trade_date', right_on='date', by='identifier') d_buy[source]['dir'] = 'buy' #add a column saying it is buy or sell df_long = pd.concat(d_sell, join='inner') df_long = df_long.append(pd.concat(d_buy, join='inner')) df_long = df_long.reset_index().dropna(subset=['price','mark']) df_long['difference'] = (df_long['price'] - df_long['mark'])/df_long['mark'] #filtering diff_threshold = 5 sell_price_threshold = 200 begindate = '2013-01-01' enddate = '2018-01-01' df_long = df_long[df_long.identifier.str.len() == 9] df_long = df_long[df_long.price < sell_price_threshold] df_long = df_long[(df_long['trade_date'] > begindate) & (df_long['trade_date'] < enddate)] #pretty plot #df_long['dotsize'] = df_long['level_0'].apply(lambda x: 400 if x == 'MANAGER' else 20) plt.switch_backend('Agg') sb.set_style("whitegrid") sb.set_context("notebook") #plt.switch_backend('Qt4Agg') order = ['MANAGER','BROKER','BVAL','IDC','MARKIT','PB','PRICESERVE','PRICINGDIRECT','REUTERS','S&P'] sb.set_palette(sb.hls_palette(10, l=.4, s=.8)) g_sell = sb.FacetGrid(df_long[df_long.dir == 'sell'], hue='source', hue_kws={'s':[50] + [20]*9, 'marker': ["o"]+["s"]*9, 'alpha': [1]+[.4]*9}, legend_out=True, aspect = 2.1, size = 4, hue_order = order) g_sell.set(ylim=(0, 105), xlim=(0, 105)) ax_sell = g_sell.map(plt.scatter, 'mark', 'price').add_legend() g_buy = sb.FacetGrid(df_long[df_long.dir == 'buy'], hue='source', hue_kws={'s':[50] + [20]*9, 'marker': ["o"]+["s"]*9, 'alpha': [1]+[.4]*9}, legend_out=True, aspect = 2.1, size = 4, hue_order = order) g_buy.set(ylim=(0, 105), xlim=(0, 105)) ax_buy = g_buy.map(plt.scatter, 'mark', 'price').add_legend() ax_sell.fig.savefig("/home/serenitas/edwin/backtest_sales.png") ax_buy.fig.savefig("/home/serenitas/edwin/backtest_buys.png") params = {} error = {} trade_PNL = {} trade_PNL_stat = {} #fit all the models at once for direction, df_temp in df_long[(df_long.difference < diff_threshold)].groupby('dir'): params[direction] = (df_temp. groupby('source'). apply(lambda df: gls('price~mark', df).fit().params)) error[direction] = [df_temp.groupby('source').mean()['difference'], df_temp.groupby('source').std()['difference']] trade_PNL[direction] = df_temp.groupby(['date', 'source']).apply( lambda df: sum(df.principal_payment * df.difference)/ sum(df.principal_payment)) trade_PNL_stat[direction] = [trade_PNL[direction].groupby(level = 'source').mean(), trade_PNL[direction].groupby(level = 'source').std()] buychart = trade_PNL['buy'].unstack(-1).plot()