diff options
| -rw-r--r-- | python/externalmarksbackfill.py | 3 | ||||
| -rw-r--r-- | python/globeop_reports.py | 21 | ||||
| -rw-r--r-- | python/mark_backtest_underpar.py | 222 | ||||
| -rw-r--r-- | python/notebooks/Valuation Backtest.ipynb | 198 |
4 files changed, 313 insertions, 131 deletions
diff --git a/python/externalmarksbackfill.py b/python/externalmarksbackfill.py index 73c56756..ae4339ca 100644 --- a/python/externalmarksbackfill.py +++ b/python/externalmarksbackfill.py @@ -97,7 +97,8 @@ def get_globs(): return chain.from_iterable(globs) settings = { - 'ReviewedPack.bseeman.SERENITAS.SERCGMAST.20170501.20170531.Draft.xlsx': ("JA:JX", "Securities Valuation Details", "N"), + 'ReviewedPack.bseeman.SERENITAS.SERCGMAST.20170601.20170630.Draft.xlsx': ("JA:JX", "Securities Valuation Details", "N"), + 'ReviewedPack.bseeman.SERENITAS.SERCGMAST.20170501.20170531.Draft.xlsx': ("JA:JX", "Securities Valuation Details", "Y"), 'ReviewedPack.bseeman.SERENITAS.SERCGMAST.20170401.20170430.Draft.xlsx': ("JA:JX", "Securities Valuation Details", "Y"), 'ReviewedPack.bseeman.SERENITAS.SERCGMAST.20170301.20170331.Draft.xlsx': ("JA:JX", "Securities Valuation Details", "Y"), 'ReviewedPack.bseeman.SERENITAS.SERCGMAST.20170201.20170228.Draft.xlsx': ("JA:JX", "Securities Valuation Details", "Y"), diff --git a/python/globeop_reports.py b/python/globeop_reports.py index cbd73673..a140672e 100644 --- a/python/globeop_reports.py +++ b/python/globeop_reports.py @@ -3,6 +3,7 @@ import os import pandas as pd import datetime from db import dbengine +from pandas.tseries.offsets import MonthEnd import load_globeop_report as load_globeop def get_monthly_pnl(): @@ -21,12 +22,6 @@ def curr_port_PNL(date = datetime.date.today(), asset_class='Subprime'): df_all = df_positions.merge(df_pnl.groupby('identifier').sum().reset_index(), on=['identifier']) return df_all -def check_valuation(): - sql_string = "SELECT * FROM valuation_reports" - df_val = pd.read_sql_query(sql_string, dbengine('dawndb'), parse_dates=['periodenddate']) - nav = df_val[df_val.fund == 'SERCGMAST'].groupby('periodenddate')['endbooknav'].sum() - return nav.resample('M').last() - def trade_performance(): sql_string = "SELECT * FROM bonds" df_trades = pd.read_sql_query(sql_string, dbengine('dawndb'), parse_dates= ['lastupdate', 'trade_date','settle_date']) @@ -35,9 +30,12 @@ def trade_performance(): df_sell = df_trades[df_trades.buysell == False].groupby('identifier').last().reset_index() df_sell.identifier = df_sell.identifier.str[:9] + df_sell['trade_pnl_date'] = df_sell.trade_date + MonthEnd(0) df_buy = df_trades[df_trades.buysell == True].groupby('identifier').last().reset_index() df_all = df_sell.merge(df_pnl.groupby('identifier').sum().reset_index(), on=['identifier']) + df_all = df_all.merge(df_pnl.reset_index()[['date', 'identifier', 'mtdtotalbookpl']], left_on=['trade_pnl_date','identifier'], right_on=['date','identifier'], suffixes=('','_at_trade_month')) + df_all = df_all.drop(['date','trade_pnl_date'], axis = 1) #now build up the table g = df_buy.groupby('identifier').sum() @@ -59,7 +57,16 @@ def trade_performance(): return df_all +def get_net_navs(): + sql_string = "SELECT * FROM valuation_reports" + df_val = pd.read_sql_query(sql_string, dbengine('dawndb'), parse_dates=['periodenddate']) + nav = df_val[df_val.fund == 'SERCGMAST'].groupby('periodenddate')['endbooknav'].sum() + nav = nav.resample('M').last() + df = pd.read_csv('/home/serenitas/edwin/Python/subscription_fee_data.csv', parse_dates=['date'], index_col =['date']) + df.index = df.index.to_period('M').to_timestamp('M') + return df.join(nav) + if __name__=='__main__': - nav = check_valuation() + nav = get_net_navs() df_pnl = trade_performance() df_curr_port = curr_port_PNL() diff --git a/python/mark_backtest_underpar.py b/python/mark_backtest_underpar.py index da65b5df..d34c7a4d 100644 --- a/python/mark_backtest_underpar.py +++ b/python/mark_backtest_underpar.py @@ -7,13 +7,14 @@ import statsmodels.api as sm from statsmodels.formula.api import gls import seaborn as sb +import globeop_reports as ops + def get_mark_df(asset_class = 'Subprime'): #Filter marks > 1000 where the marks are weird... df_external_marks = pd.read_sql_query("select * from external_marks_mapped where mark < 1000" , dbengine('dawndb')) - date_range = df_external_marks.date.unique() df_trades = pd.DataFrame() - for date in date_range: + for date in df_external_marks.date.unique(): df_temp= pd.read_sql_query("select description, identifier, usd_market_value/price*100 as curr_ntl from risk_positions(%s, %s) where price >0 and length(identifier) = 9 " , dbengine('dawndb'), params = [date, asset_class]) df_temp['date'] = date @@ -21,161 +22,136 @@ def get_mark_df(asset_class = 'Subprime'): df = df_trades.merge(df_external_marks).dropna() return df.set_index(['date','identifier']) -def calc_mark_diff(df): +def calc_mark_diff(df, sources= ['PRICESERVE', 'PRICINGDIRECT','BVAL','MARKIT','BROKER', 'REUTERS', 'S&P', 'PB', 'IDC']): + #All Sources (including manager...?!) - average, manager mark only, median, closest g = df.groupby(level = ['date','identifier']) - avg_mark = g.mean() - avg_mark = avg_mark.join(df[df.source == 'MANAGER']['mark'], rsuffix = '_manager') - avg_mark = avg_mark.join(g.median()['mark'], rsuffix = '_median_all') - - def closest(x): - if x.mark.count() > 1: - x.dist = abs(x.mark - x.mark[x.source == 'MANAGER']) - mark = x.mark[x.dist == x.dist[x.dist>0].min()].iloc[0] - else: - mark = x.mark[0] - return mark - + diff = g.mean() + diff = diff.join(df[df.source == 'MANAGER']['mark'], rsuffix = '_manager') + diff = diff.join(g.median()['mark'], rsuffix = '_median_all') temp = g.apply(closest) temp = temp.rename('mark_closest_all') - avg_mark = avg_mark.join(temp) + diff = diff.join(temp) - #Filtered Sources - choose PRICESERVE,PRICINGDIRECT,MARKIT,BVAL - #Filtered Sources - now really it is everything - sources = ['PRICESERVE', 'PRICINGDIRECT','BVAL','MARKIT','BROKER', 'REUTERS', 'S&P', 'PB', 'IDC'] + #Filtered Sources - mean, median, remove max min df_filtered = df[df.source.isin(sources)] g1 = df_filtered.groupby(level = ['date','identifier']) - - #Filtered Sources - average,median, remove max min, closest - avg_mark = avg_mark.join(g1.mean()['mark'], rsuffix = '_filtered') - avg_mark = avg_mark.join(g1.median()['mark'], rsuffix = '_filtered_median') - - def remove_max_min(x): - if x.count() >= 4: - mark = (x.sum() - x.max() - x.min())/(x.count() -2) - else: - mark = x.mean() - return mark - - avg_mark = avg_mark.join(g1.mark.apply(remove_max_min), rsuffix = '_filtered_no_max_min') + diff = diff.join(g1.mean()['mark'], rsuffix = '_filtered_mean') + diff = diff.join(g1.median()['mark'], rsuffix = '_filtered_median') + diff = diff.join(g1.mark.apply(remove_max_min), rsuffix = '_filtered_no_max_min') #calculate difference: negative means Serenitas marks higher - avg_mark1 = avg_mark.multiply(avg_mark.curr_ntl/100, axis = 'index') - del avg_mark1['curr_ntl'] - diff = avg_mark1.apply(lambda x: (x-x.mark_manager), axis = 1) - - #diff = difference to copy to performance number sheet - diff = diff.groupby(level = 'date').sum() + diff = diff.multiply(diff.curr_ntl/100, axis = 'index') + del diff['curr_ntl'] + diff = diff.rename(columns = {'mark':'mark_mean_all'}) + diff = diff.apply(lambda x: (x-x.mark_manager), axis = 1) - #count of each source - g2 = df.set_index('source', append=True).groupby(level = ['date','source']) - count = g2['mark'].count().unstack(-1) + return diff.groupby(level = 'date').sum() - #diff_by_source: now calculate the pricing by source - dftemp = df.drop('description', 1) - dftemp = dftemp.set_index(['source'], append=True).apply(lambda x: x.curr_ntl * x.mark/100, axis = 1) - dftemp = dftemp.groupby(level =['date','identifier','source']).mean() - dftemp = dftemp.unstack(-1).apply(lambda x: (x-x.MANAGER), axis = 1) - diff_by_source = dftemp.groupby(level = 'date').sum() +def closest(x): + if x.mark.count() > 1: + x.dist = abs(x.mark - x.mark[x.source == 'MANAGER']) + return x.mark[x.dist == x.dist[x.dist>0].min()].iloc[0] + else: + return x.mark[0] - #Diff by MV per source. - diff_by_month = dftemp.groupby(level = 1).diff() +def remove_max_min(x): + if x.count() >= 4: + return (x.sum() - x.max() - x.min())/(x.count() -2) + else: + return x.mean() - return(diff, diff_by_source, count) +def diff_by_source(df): + #diff_by_source: input get_mark_df(), calculate the pricing by source + df = df.drop('description', 1) + df = df.set_index(['source'], append=True).apply(lambda x: x.curr_ntl * x.mark/100, axis = 1) + df = df.groupby(level =['date','identifier','source']).mean() + df = df.unstack(-1).apply(lambda x: (x-x.MANAGER), axis = 1) + return df.groupby(level = 'date').sum() -def back_test(): - df = pd.read_sql_table('external_marks_mapped', dbengine('dawndb'), - parse_dates=['date']) - df = df[df.source.notnull()] - df_wide = (pd.pivot_table(df, 'mark', ['identifier', 'date'], 'source'). - reset_index(). - sort_values('date')) +def diff_by_source_percentage(df): + df = diff_by_source(df) + df = df.join(ops.get_net_navs()['endbooknav']) + df = df.apply(lambda x: (x/x.endbooknav), axis = 1) + del df['endbooknav'] + return df - df_trades = {} - df_trades['sell'] = pd.read_sql_query("select trade_date, identifier, faceamount, principal_payment, price " \ - "from bonds where buysell = 'f'" - , dbengine('dawndb'), - parse_dates=['trade_date']) - df_trades['buy'] = pd.read_sql_query("select trade_date, identifier, faceamount, principal_payment, price " \ - "from bonds where buysell = 't'" - , dbengine('dawndb'), - parse_dates=['trade_date']) - for direction, trades in df_trades.items(): - df_trades[direction] = trades.sort_values('trade_date') +def count_sources(df): + #input get_mark_df(), plot count of each source + g2 = df.set_index('source', append=True).groupby(level = ['date','source']) + # there are a good amount of Bloomberg duplicates, not a big deal but should clean them up + g2['mark'].count().unstack(-1).plot() - df_sell_wide = pd.merge_asof(df_trades['sell'], df_wide, left_on='trade_date', right_on='date', by='identifier') - df_wide_temp = df_wide.set_index('date').shift(periods = -1, freq= 'M', axis = 1).reset_index() - df_buy_wide = pd.merge_asof(df_trades['buy'], df_wide_temp - , left_on='trade_date', right_on='date', by='identifier') +def alt_navs(): + navs = ops.get_net_navs() + df = calc_mark_diff(get_mark_df()) + end_nav, beg_nav, returns, nav_100 = pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(index = df.index, columns=df.columns) + for col in df.columns: + end_nav[col] = df[col] + navs.endbooknav + beg_nav[col] = end_nav[col].shift(1) + navs.net_flow.shift(1) + beg_nav[col].iloc[0] = 12500000 + returns[col] = (end_nav[col] - navs.incentive)/beg_nav[col] -1 + for i, row in returns.dropna().reset_index().iterrows(): + nav_100.iloc[i] = 100 if i == 0 else nav_100.iloc[i-1] + nav_100.iloc[i] = nav_100.iloc[i] * (1 + returns.iloc[i]) + return returns, nav_100 - d_sell = {} - d_buy = {} - df_long = {} +def annual_performance(nav_100): + perf = nav_100.groupby(pd.TimeGrouper(freq = 'A')).last() + perf_ann = perf/perf.shift(1) - 1 + perf_ann['2013'] = perf['2013']/100-1 + return perf_ann - for source, g in df.groupby('source'): - d_sell[source] = pd.merge_asof(df_trades['sell'], g, left_on='trade_date', right_on='date', by='identifier') - d_sell[source]['dir'] = 'sell' - d_buy[source] = pd.merge_asof(df_trades['buy'], g.set_index('date').shift(periods = -1, freq= 'BM', axis = 1).reset_index(), left_on='trade_date', right_on='date', by='identifier') - d_buy[source]['dir'] = 'buy' +def alt_nav_impact(): + navs = ops.get_net_navs() + df = calc_mark_diff(get_mark_df()) + df = df.join(navs.endbooknav) + return df.iloc[-1]/df.iloc[-1]['endbooknav'] - #add a column saying it is buy or sell +def back_test(begindate = '2013-01-01', enddate = '2018-01-01', sell_price_threshold = 200): + df = pd.read_sql_query("SELECT * FROM external_marks_mapped WHERE source IS NOT NULL", dbengine('dawndb'), + parse_dates=['date']) + df_wide = (pd.pivot_table(df, 'mark', ['identifier', 'date'], 'source').reset_index().sort_values('date')) + df_trades = pd.read_sql_query("select trade_date, identifier, price, buysell from bonds", + dbengine('dawndb'), parse_dates=['trade_date']) + df_trades.sort_values('trade_date', inplace = True) + df_sell_wide = pd.merge_asof(df_trades[df_trades.buysell == False], df_wide, left_on='trade_date', right_on='date', by='identifier').drop('date', 1) - df_long = pd.concat(d_sell, join='inner') - df_long = df_long.append(pd.concat(d_buy, join='inner')) - df_long = df_long.reset_index().dropna(subset=['price','mark']) + df_long = df_sell_wide.set_index(['trade_date','identifier','price','buysell']).stack() + df_long = df_long.reset_index().rename(columns={'level_4': 'source', 0:'mark'}) df_long['difference'] = (df_long['price'] - df_long['mark'])/df_long['mark'] #filtering - diff_threshold = 5 - sell_price_threshold = 200 - begindate = '2013-01-01' - enddate = '2018-01-01' - df_long = df_long[df_long.identifier.str.len() == 9] df_long = df_long[df_long.price < sell_price_threshold] df_long = df_long[(df_long['trade_date'] > begindate) & (df_long['trade_date'] < enddate)] df_long.loc[df_long.source == 'MANAGER','source'] = 'LMCG' - #pretty plot - #df_long['dotsize'] = df_long['level_0'].apply(lambda x: 400 if x == 'MANAGER' else 20) + return df_long - plt.switch_backend('Agg') - sb.set_style("whitegrid") - sb.set_context("notebook") +def stats(df_long, diff_threshold = 5): - #plt.switch_backend('Qt4Agg') - order = ['LMCG','BROKER','BVAL','IDC','MARKIT','PB','PRICESERVE','PRICINGDIRECT','REUTERS','S&P'] - sb.set_palette(sb.hls_palette(10, l=.4, s=.8)) + g = df_long[df_long.difference < diff_threshold].groupby('source') - g_sell = sb.FacetGrid(df_long[df_long.dir == 'sell'], hue='source', hue_kws={'s':[50] + [20]*9, 'marker': ["o"]+["s"]*9, 'alpha': [1]+[.4]*9}, legend_out=True, aspect = 2.1, size = 4, hue_order = order) - g_sell.set(ylim=(0, 105), xlim=(0, 105)) - ax_sell = g_sell.map(plt.scatter, 'mark', 'price').add_legend() - ax_sell.set_axis_labels('Mark','Sale Price') - ax_sell.fig.savefig("/home/serenitas/edwin/backtest_sales.png") + #fit all the models at once + params = g.apply(lambda df: gls('price~mark', df).fit().params) + error = pd.DataFrame([g.difference.mean(),g.difference.std()]) + error.index = ['average', 'standard deviation'] - g_buy = sb.FacetGrid(df_long[df_long.dir == 'buy'], hue='source', hue_kws={'s':[50] + [20]*9, 'marker': ["o"]+["s"]*9, 'alpha': [1]+[.4]*9}, legend_out=True, aspect = 2.1, size = 4, hue_order = order) - g_buy.set(ylim=(0, 105), xlim=(0, 105)) - ax_buy = g_buy.map(plt.scatter, 'mark', 'price').add_legend() + return params, error - ax_buy.fig.savefig("/home/serenitas/edwin/backtest_buys.png") +def pretty_plot(df_long): - params = {} - error = {} - trade_PNL = {} - trade_PNL_stat = {} + #plt.switch_backend('Agg') + sb.set_style("whitegrid") + sb.set_context("notebook") - #fit all the models at once - for direction, df_temp in df_long[(df_long.difference < diff_threshold)].groupby('dir'): - params[direction] = (df_temp. - groupby('source'). - apply(lambda df: gls('price~mark', df).fit().params)) - error[direction] = [df_temp.groupby('source').mean()['difference'], - df_temp.groupby('source').std()['difference']] - trade_PNL[direction] = df_temp.groupby(['date', 'source']).apply( - lambda df: sum(df.principal_payment * df.difference)/ - sum(df.principal_payment)) - trade_PNL_stat[direction] = [trade_PNL[direction].groupby(level = 'source').mean(), - trade_PNL[direction].groupby(level = 'source').std()] + order = ['LMCG','BROKER','BVAL','IDC','MARKIT','PB','PRICESERVE','PRICINGDIRECT','REUTERS','S&P'] + sb.set_palette(sb.hls_palette(10, l=.4, s=.8)) - buychart = trade_PNL['buy'].unstack(-1).plot() + grid = sb.FacetGrid(df_long, hue='source', hue_kws={'s':[50] + [20]*9, 'marker': ["o"]+["s"]*9, 'alpha': [1]+[.4]*9}, legend_out=True, aspect = 2.1, size = 4, hue_order = order) + grid.set(ylim=(0, 105), xlim=(0, 105)) + ax = grid.map(plt.scatter, 'mark', 'price').add_legend() + ax.set_axis_labels('Mark', 'sale price') + ax.fig.savefig("/home/serenitas/edwin/backtest.png") diff --git a/python/notebooks/Valuation Backtest.ipynb b/python/notebooks/Valuation Backtest.ipynb new file mode 100644 index 00000000..88211db3 --- /dev/null +++ b/python/notebooks/Valuation Backtest.ipynb @@ -0,0 +1,198 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "from matplotlib.ticker import FuncFormatter \n", + "from datetime import datetime\n", + "import pandas as pd\n", + "\n", + "import mark_backtest_underpar as mark\n", + "import globeop_reports as ops" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#exclude sell price that are over 200\n", + "df_long = mark.back_test('2013-01-01', '2018-01-01', sell_price_threshold = 200)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#%matplotlib nbagg\n", + "%matplotlib inline\n", + "mark.pretty_plot(df_long)\n", + "#file saved in serenitas shared drive/edwin/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#exclude trades that are over 5x mark for purpose of regression\n", + "diff_threshold = 5\n", + "results = mark.stats(df_long, diff_threshold)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Regression Intercept\n", + "results[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Sale Difference\n", + "results[1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Now Calculate alternate valuation methodologies\n", + "df = mark.get_mark_df()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "mark.count_sources(df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#difference by source\n", + "nav = ops.get_net_navs()['endbooknav']\n", + "difference = mark.diff_by_source_percentage(df)\n", + "#difference.to_clipboard()\n", + "\n", + "#plot\n", + "ax = difference.plot(kind = 'bar', legend = True)\n", + "\n", + "visible = ax.xaxis.get_ticklabels()[::6]\n", + "for label in ax.xaxis.get_ticklabels():\n", + " if label not in visible:\n", + " label.set_visible(False)\n", + " \n", + "ax.xaxis.set_major_formatter(plt.FixedFormatter(difference.index.to_series().dt.strftime(\"%b %Y\")))\n", + "ax.set_ylabel('NAV Impact vs. Fund Policy (%)')\n", + "vals = ax.get_yticks()\n", + "ax.set_yticklabels(['{:3.0f}%'.format(x*100) for x in vals])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "results = mark.alt_navs()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#%matplotlib nbagg\n", + "to_plot = ['mark_closest_all', 'mark_mean_all']\n", + "to_plot1 = ['mark_manager']\n", + "plot_df0 = results[1][to_plot]\n", + "plot_df1 = results[1][to_plot1]\n", + "\n", + "plot_df0 = plot_df0.rename(columns = {'mark_closest_all': 'Third-pary mark closest to LMCG valuation', \\\n", + " 'mark_mean_all': 'Average of all third-party marks'})\n", + "plot_df1 = plot_df1.rename(columns = {'mark_manager': 'Marks per fund valuation policy'})\n", + "\n", + "ax = plot_df0.plot(figsize = [10, 3.5])\n", + "ax = plot_df1.plot(marker = 'o', ax = ax)\n", + "plt.rcParams[\"font.family\"] = \"sans-serif\"\n", + "ax.set_xlabel('')\n", + "ax.set_ylabel('NAV', weight = 'bold')\n", + "ax.set_title('Fund Return Using Different Valuation Methods', weight = 'bold')\n", + "lgd = ax.legend(loc='upper center', bbox_to_anchor=(0.5, -.1), shadow=True, ncol=3)\n", + "ax.figure.savefig(\"/home/serenitas/edwin/PythonGraphs/Valuation_1.png\", bbox_extra_artists=(lgd,), bbox_inches='tight')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mark.annual_performance(results[1])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pd.DataFrame(mark.alt_nav_impact())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} |
