diff options
| -rw-r--r-- | python/externalmarksbackfill.py | 9 | ||||
| -rw-r--r-- | python/mark_backtest_underpar.py | 208 |
2 files changed, 141 insertions, 76 deletions
diff --git a/python/externalmarksbackfill.py b/python/externalmarksbackfill.py index 64c8c81e..73c56756 100644 --- a/python/externalmarksbackfill.py +++ b/python/externalmarksbackfill.py @@ -10,7 +10,7 @@ from pickle import dumps from sqlalchemy import create_engine from itertools import chain from glob import glob, iglob -from pandas.tseries.offsets import MonthEnd +from pandas.tseries.offsets import MonthEnd, BDay def runAllFill(): for f in get_globs(): @@ -20,7 +20,8 @@ def runAllFill(): def runSingleFill(f): range_name, sheet_name, done = settings[os.path.basename(f)] - markdate = pd.Timestamp(os.path.dirname(f).rsplit(os.path.sep, 1)[-1]) + MonthEnd() + markdate = pd.Timestamp(os.path.dirname(f).rsplit(os.path.sep, 1)[-1])-5*BDay() + MonthEnd() + #change /usr/lib/python3.6/site-packages/xlrd/xlsx.py line 609 to check for ":" in ref as well. Otherwise single merged cells bombs marks = pd.read_excel(f, sheet_name, skiprows=13, parse_cols=range_name) df = pd.DataFrame() @@ -96,6 +97,10 @@ def get_globs(): return chain.from_iterable(globs) settings = { + 'ReviewedPack.bseeman.SERENITAS.SERCGMAST.20170501.20170531.Draft.xlsx': ("JA:JX", "Securities Valuation Details", "N"), + 'ReviewedPack.bseeman.SERENITAS.SERCGMAST.20170401.20170430.Draft.xlsx': ("JA:JX", "Securities Valuation Details", "Y"), + 'ReviewedPack.bseeman.SERENITAS.SERCGMAST.20170301.20170331.Draft.xlsx': ("JA:JX", "Securities Valuation Details", "Y"), + 'ReviewedPack.bseeman.SERENITAS.SERCGMAST.20170201.20170228.Draft.xlsx': ("JA:JX", "Securities Valuation Details", "Y"), 'ReviewedPack.bseeman.SERENITAS.SERCGMAST.20170101.20170131.Draft.xlsx': ("JA:JX", "Securities Valuation Details", "Y"), 'ReviewedPack.bseeman.SERENITAS.SERCGMAST.20161201.20161231.Draft.xlsx': ("JA:JX", "Securities Valuation Details", "Y"), 'ReviewedPack.bseeman.SERENITAS.SERCGMAST.20161101.20161130.Draft.xlsx': ("JA:JX", "Securities Valuation Details", "Y"), diff --git a/python/mark_backtest_underpar.py b/python/mark_backtest_underpar.py index b17cc26f..a125f15a 100644 --- a/python/mark_backtest_underpar.py +++ b/python/mark_backtest_underpar.py @@ -7,96 +7,156 @@ import statsmodels.api as sm from statsmodels.formula.api import gls import seaborn as sb +def calc_mark_diff(asset_class = 'Subprime'): + #Filter marks > 1000 where the marks are weird... + df_external_marks = pd.read_sql_query("select * from external_marks_mapped where mark < 1000" + , dbengine('dawndb')) + date_range = df_external_marks.date.unique() + df_trades = pd.DataFrame() + for date in date_range: + df_temp= pd.read_sql_query("select description, identifier, usd_market_value/price*100 as curr_ntl from risk_positions(%s, %s) where price >0 and length(identifier) = 9 " + , dbengine('dawndb'), params = [date, asset_class]) + df_temp['date'] = date + df_trades = df_trades.append(df_temp) + df = df_trades.merge(df_external_marks).dropna() + df = df.set_index(['date','identifier']) -df = pd.read_sql_table('external_marks_mapped', dbengine('dawndb'), - parse_dates=['date']) -df = df[df.source.notnull()] -df_wide = (pd.pivot_table(df, 'mark', ['identifier', 'date'], 'source'). - reset_index(). - sort_values('date')) + #All Sources - average, manager mark only, median, closest + g = df.groupby(level = ['date','identifier']) + avg_mark = g.mean() + avg_mark = avg_mark.join(df[df.source == 'MANAGER']['mark'], rsuffix = '_manager') + avg_mark = avg_mark.join(g.median()['mark'], rsuffix = '_median_all') -df_trades = {} -df_trades['sell'] = pd.read_sql_query("select trade_date, identifier, faceamount, principal_payment, price " \ - "from bonds where buysell = 'f'" - , dbengine('dawndb'), - parse_dates=['trade_date']) -df_trades['buy'] = pd.read_sql_query("select trade_date, identifier, faceamount, principal_payment, price " \ - "from bonds where buysell = 't'" - , dbengine('dawndb'), - parse_dates=['trade_date']) -for direction, trades in df_trades.items(): - df_trades[direction] = trades.sort_values('trade_date') + def closest(x): + if x.mark.count() > 1: + x.dist = abs(x.mark - x.mark[x.source == 'MANAGER']) + mark = x.mark[x.dist == x.dist[x.dist>0].min()].iloc[0] + else: + mark = x.mark[0] + return mark -df_sell_wide = pd.merge_asof(df_trades['sell'], df_wide, left_on='trade_date', right_on='date', by='identifier') -df_wide_temp = df_wide.set_index('date').shift(periods = -1, freq= 'M', axis = 1).reset_index() -df_buy_wide = pd.merge_asof(df_trades['buy'], df_wide_temp - , left_on='trade_date', right_on='date', by='identifier') + temp = g.apply(closest) + temp = temp.rename('mark_closest_all') + avg_mark = avg_mark.join(temp) -d_sell = {} -d_buy = {} -df_long = {} + #Filtered Sources - choose PRICESERVE,PRICINGDIRECT,MARKIT,BVAL + sources = ['PRICESERVE', 'PRICINGDIRECT','BVAL','MARKIT','BROKER'] + df_filtered = df[df.source.isin(sources)] + g1 = df_filtered.groupby(level = ['date','identifier']) -for source, g in df.groupby('source'): - d_sell[source] = pd.merge_asof(df_trades['sell'], g, left_on='trade_date', right_on='date', by='identifier') - d_sell[source]['dir'] = 'sell' - d_buy[source] = pd.merge_asof(df_trades['buy'], g.set_index('date').shift(periods = -1, freq= 'BM', axis = 1).reset_index(), left_on='trade_date', right_on='date', by='identifier') - d_buy[source]['dir'] = 'buy' + #Filtered Sources - average,median, remove max min, closest + avg_mark = avg_mark.join(g1.mean()['mark'], rsuffix = '_filtered') + avg_mark = avg_mark.join(g1.median()['mark'], rsuffix = '_filtered_median') -#add a column saying it is buy or sell + def remove_max_min(x): + if x.count() >= 4: + mark = (x.sum() - x.max() - x.min())/(x.count() -2) + else: + mark = x.mean() + return mark -df_long = pd.concat(d_sell, join='inner') -df_long = df_long.append(pd.concat(d_buy, join='inner')) -df_long = df_long.reset_index().dropna(subset=['price','mark']) -df_long['difference'] = (df_long['price'] - df_long['mark'])/df_long['mark'] + avg_mark = avg_mark.join(g1.mark.apply(remove_max_min), rsuffix = '_filtered_no_max_min') -#filtering -diff_threshold = 5 -begindate = '2013-01-01' -enddate = '2018-01-01' + #calculate difference: negative means Serenitas marks higher + avg_mark1 = avg_mark.multiply(avg_mark.curr_ntl/100, axis = 'index') + del avg_mark1['curr_ntl'] + diff = avg_mark1.apply(lambda x: (x-x.mark_manager), axis = 1) + diff = diff.groupby(level = 'date').sum() -df_long = df_long[df_long.identifier.str.len() == 9] -df_long = df_long[(df_long['trade_date'] > begindate) & (df_long['trade_date'] < enddate)] + diff.to_clipboard() -#difference['Std Dev'] = df_long[df_long<filter].std() -#pretty plot -#df_long['dotsize'] = df_long['level_0'].apply(lambda x: 400 if x == 'MANAGER' else 20) -plt.switch_backend('Agg') -sb.set_style("whitegrid") -sb.set_context("notebook") +def back_test(): + df = pd.read_sql_table('external_marks_mapped', dbengine('dawndb'), + parse_dates=['date']) + df = df[df.source.notnull()] + df_wide = (pd.pivot_table(df, 'mark', ['identifier', 'date'], 'source'). + reset_index(). + sort_values('date')) -#plt.switch_backend('Qt4Agg') -order = ['MANAGER','BROKER','BVAL','IDC','MARKIT','PB','PRICESERVE','PRICINGDIRECT','REUTERS','S&P'] -sb.set_palette(sb.hls_palette(10, l=.4, s=.8)) + df_trades = {} + df_trades['sell'] = pd.read_sql_query("select trade_date, identifier, faceamount, principal_payment, price " \ + "from bonds where buysell = 'f'" + , dbengine('dawndb'), + parse_dates=['trade_date']) + df_trades['buy'] = pd.read_sql_query("select trade_date, identifier, faceamount, principal_payment, price " \ + "from bonds where buysell = 't'" + , dbengine('dawndb'), + parse_dates=['trade_date']) + for direction, trades in df_trades.items(): + df_trades[direction] = trades.sort_values('trade_date') -g_sell = sb.FacetGrid(df_long[df_long.dir == 'sell'], hue='source', hue_kws={'s':[50] + [20]*9, 'marker': ["o"]+["s"]*9, 'alpha': [1]+[.4]*9}, legend_out=True, aspect = 2.1, size = 4, hue_order = order) -g_sell.set(ylim=(0, 105), xlim=(0, 105)) -ax_sell = g_sell.map(plt.scatter, 'mark', 'price').add_legend() + df_sell_wide = pd.merge_asof(df_trades['sell'], df_wide, left_on='trade_date', right_on='date', by='identifier') + df_wide_temp = df_wide.set_index('date').shift(periods = -1, freq= 'M', axis = 1).reset_index() + df_buy_wide = pd.merge_asof(df_trades['buy'], df_wide_temp + , left_on='trade_date', right_on='date', by='identifier') -g_buy = sb.FacetGrid(df_long[df_long.dir == 'buy'], hue='source', hue_kws={'s':[50] + [20]*9, 'marker': ["o"]+["s"]*9, 'alpha': [1]+[.4]*9}, legend_out=True, aspect = 2.1, size = 4, hue_order = order) -g_buy.set(ylim=(0, 105), xlim=(0, 105)) -ax_buy = g_buy.map(plt.scatter, 'mark', 'price').add_legend() + d_sell = {} + d_buy = {} + df_long = {} -ax_sell.fig.savefig("/home/serenitas/edwin/backtest_sales.png") -ax_buy.fig.savefig("/home/serenitas/edwin/backtest_buys.png") + for source, g in df.groupby('source'): + d_sell[source] = pd.merge_asof(df_trades['sell'], g, left_on='trade_date', right_on='date', by='identifier') + d_sell[source]['dir'] = 'sell' + d_buy[source] = pd.merge_asof(df_trades['buy'], g.set_index('date').shift(periods = -1, freq= 'BM', axis = 1).reset_index(), left_on='trade_date', right_on='date', by='identifier') + d_buy[source]['dir'] = 'buy' -params = {} -error = {} -trade_PNL = {} -trade_PNL_stat = {} + #add a column saying it is buy or sell -#fit all the models at once -for direction, df_temp in df_long[(df_long.difference < diff_threshold)].groupby('dir'): - params[direction] = (df_temp. - groupby('source'). - apply(lambda df: gls('price~mark', df).fit().params)) - error[direction] = [df_temp.groupby('source').mean()['difference'], - df_temp.groupby('source').std()['difference']] - trade_PNL[direction] = df_temp.groupby(['date', 'source']).apply( - lambda df: sum(df.principal_payment * df.difference)/ - sum(df.principal_payment)) - trade_PNL_stat[direction] = [trade_PNL[direction].groupby(level = 'source').mean(), - trade_PNL[direction].groupby(level = 'source').std()] + df_long = pd.concat(d_sell, join='inner') + df_long = df_long.append(pd.concat(d_buy, join='inner')) + df_long = df_long.reset_index().dropna(subset=['price','mark']) + df_long['difference'] = (df_long['price'] - df_long['mark'])/df_long['mark'] -buychart = trade_PNL['buy'].unstack(-1).plot() + #filtering + diff_threshold = 5 + sell_price_threshold = 200 + begindate = '2013-01-01' + enddate = '2018-01-01' + + df_long = df_long[df_long.identifier.str.len() == 9] + df_long = df_long[df_long.price < sell_price_threshold] + df_long = df_long[(df_long['trade_date'] > begindate) & (df_long['trade_date'] < enddate)] + + #pretty plot + #df_long['dotsize'] = df_long['level_0'].apply(lambda x: 400 if x == 'MANAGER' else 20) + + plt.switch_backend('Agg') + sb.set_style("whitegrid") + sb.set_context("notebook") + + #plt.switch_backend('Qt4Agg') + order = ['MANAGER','BROKER','BVAL','IDC','MARKIT','PB','PRICESERVE','PRICINGDIRECT','REUTERS','S&P'] + sb.set_palette(sb.hls_palette(10, l=.4, s=.8)) + + g_sell = sb.FacetGrid(df_long[df_long.dir == 'sell'], hue='source', hue_kws={'s':[50] + [20]*9, 'marker': ["o"]+["s"]*9, 'alpha': [1]+[.4]*9}, legend_out=True, aspect = 2.1, size = 4, hue_order = order) + g_sell.set(ylim=(0, 105), xlim=(0, 105)) + ax_sell = g_sell.map(plt.scatter, 'mark', 'price').add_legend() + + g_buy = sb.FacetGrid(df_long[df_long.dir == 'buy'], hue='source', hue_kws={'s':[50] + [20]*9, 'marker': ["o"]+["s"]*9, 'alpha': [1]+[.4]*9}, legend_out=True, aspect = 2.1, size = 4, hue_order = order) + g_buy.set(ylim=(0, 105), xlim=(0, 105)) + ax_buy = g_buy.map(plt.scatter, 'mark', 'price').add_legend() + + ax_sell.fig.savefig("/home/serenitas/edwin/backtest_sales.png") + ax_buy.fig.savefig("/home/serenitas/edwin/backtest_buys.png") + + params = {} + error = {} + trade_PNL = {} + trade_PNL_stat = {} + + #fit all the models at once + for direction, df_temp in df_long[(df_long.difference < diff_threshold)].groupby('dir'): + params[direction] = (df_temp. + groupby('source'). + apply(lambda df: gls('price~mark', df).fit().params)) + error[direction] = [df_temp.groupby('source').mean()['difference'], + df_temp.groupby('source').std()['difference']] + trade_PNL[direction] = df_temp.groupby(['date', 'source']).apply( + lambda df: sum(df.principal_payment * df.difference)/ + sum(df.principal_payment)) + trade_PNL_stat[direction] = [trade_PNL[direction].groupby(level = 'source').mean(), + trade_PNL[direction].groupby(level = 'source').std()] + + buychart = trade_PNL['buy'].unstack(-1).plot() |
