diff options
| -rw-r--r-- | python/mark_backtest_backfill.py | 23 | ||||
| -rw-r--r-- | python/mark_backtest_underpar.py | 102 | ||||
| -rw-r--r-- | python/notebooks/Valuation Backtest.ipynb | 60 |
3 files changed, 108 insertions, 77 deletions
diff --git a/python/mark_backtest_backfill.py b/python/mark_backtest_backfill.py index 82187f9c..5a35edf1 100644 --- a/python/mark_backtest_backfill.py +++ b/python/mark_backtest_backfill.py @@ -3,9 +3,9 @@ import os import re import psycopg2 import datetime -import bbg_helpers +import serenitas.analytics.bbg_helpers -from utils.db import dbconn, serenitas_engine, dawn_engine, DataError +from serenitas.utils.db import dbconn, serenitas_engine, dawn_engine, DataError from pickle import dumps from sqlalchemy import create_engine from itertools import chain @@ -28,7 +28,9 @@ def runSingleFill(f): + MonthEnd() ) # change /usr/lib/python3.6/site-packages/xlrd/xlsx.py line 609 to check for ":" in ref as well. Otherwise single merged cells bombs - marks = pd.read_excel(f, sheet_name, skiprows=11, usecols=range_name) + marks = pd.read_excel( + f, sheet_name, skiprows=11, usecols=range_name, engine="openpyxl" + ) marks = marks.rename(columns=lambda x: x.replace(".1", "")) df = pd.DataFrame() @@ -123,6 +125,21 @@ def get_globs(): settings = { + "ReviewedPack.jmircovi.SERENITAS.SERCGMAST.20201201.20201231.Draft.xlsx": ( + "LA:MD", + "Securities Valuation Details", + "Y", + ), + "ReviewedPack.jmircovi.SERENITAS.SERCGMAST.20201101.20201130.Draft.xlsx": ( + "JA:JX", + "Securities Valuation Details", + "Y", + ), + "ReviewedPack.jmircovi.SERENITAS.SERCGMAST.20201001.20201031.Draft.xlsx": ( + "JA:JX", + "Securities Valuation Details", + "Y", + ), "ReviewedPack.jmircovi.SERENITAS.SERCGMAST.20200901.20200930.Draft.xlsx": ( "JA:JX", "Securities Valuation Details", diff --git a/python/mark_backtest_underpar.py b/python/mark_backtest_underpar.py index 821d86a7..dcf2ba0d 100644 --- a/python/mark_backtest_underpar.py +++ b/python/mark_backtest_underpar.py @@ -8,7 +8,7 @@ import datetime import globeop_reports as ops from statsmodels.formula.api import gls -from utils.db import serenitas_engine, dawn_engine, DataError +from serenitas.utils.db import serenitas_engine, dawn_engine, DataError def get_mark_df(asset_class="Subprime"): @@ -18,7 +18,7 @@ def get_mark_df(asset_class="Subprime"): "select * from external_marks_mapped where mark < 3000", dawn_engine ) df_trades = pd.DataFrame() - for date in df_external_marks.date.unique(): + for date in df_external_marks["date"].unique(): df_temp = pd.read_sql_query( "select description, identifier, " "usd_market_value/price*100 as curr_ntl " @@ -33,62 +33,56 @@ def get_mark_df(asset_class="Subprime"): return df.set_index(["date", "identifier"]) -def calc_mark_diff(df=None, sources=None): - - if sources is None: - sources = [ - "PRICESERVE", - "PRICINGDIRECT", - "BVAL", - "MARKIT", - "BROKER", - "REUTERS", - "S&P", - "IDC", - ] - if df is None: - df = get_mark_df() - # All Sources (including manager...?!) - average, manager mark only, median, closest - g = df.groupby(level=["date", "identifier"]) - diff = g.mean() - diff = diff.join(df[df.source == "MANAGER"]["mark"], rsuffix="_manager") - diff = diff.join(g.median()["mark"], rsuffix="_median_all") - temp = g.apply(closest) - temp = temp.rename("mark_closest_all") - diff = diff.join(temp) - - # Filtered Sources - mean, median, remove max min - df_filtered = df[df.source.isin(sources)] - g1 = df_filtered.groupby(level=["date", "identifier"]) - diff = diff.join(g1.mean()["mark"], rsuffix="_filtered_mean") - diff = diff.join(g1.median()["mark"], rsuffix="_filtered_median") - diff = diff.join(g1.mark.apply(remove_max_min), rsuffix="_filtered_no_max_min") - - # calculate difference: negative means Serenitas marks higher - diff = diff.multiply(diff.curr_ntl / 100, axis="index") - del diff["curr_ntl"] - diff = diff.rename(columns={"mark": "mark_mean_all"}) - diff = diff.apply(lambda x: (x - x.mark_manager), axis=1) +def calc_mark_diff(df, source_remove=["PB"]): + # use get_mark_df + df = df[~df.source.isin(source_remove)] + df = ( + df.set_index("source", append=True) + .groupby(level=["date", "identifier", "source"]) + .mean() + ) + marks = df["mark"].unstack(-1).dropna(subset=["MANAGER"]) - return diff.groupby(level="date").sum() + def remove_max_min(x): + x = x.dropna() + return ( + (x.sum() - x.max() - x.min()) / (x.count() - 2) + if x.count() >= 4 + else x.mean() + ) + def pick_closest(x): + dist = abs(x.dropna() - x["MANAGER"]) + if dist.count() > 1: + dist.drop("MANAGER", inplace=True) + return x[dist.idxmin] -def closest(x): - x["dist"] = abs(x.mark - x.mark[x.source == "MANAGER"]).fillna(0) - return x.mark[x.dist == x.dist.min()].iloc[0] + def mean_with_default(x): + return ( + x["MANAGER"] if x.drop("MANAGER").count() == 0 else x.drop("MANAGER").mean() + ) + marks = pd.concat( + [ + marks.apply(mean_with_default, axis=1), + marks.apply(remove_max_min, axis=1), + marks.apply(pick_closest, axis=1), + ], + keys=["mean_vendor", "no_min_max", "closest_vendor"], + axis=1, + ) -def remove_max_min(x): - if x.count() >= 4: - return (x.sum() - x.max() - x.min()) / (x.count() - 2) - else: - return x.mean() + marks = marks.subtract(df.xs("MANAGER", level="source")["mark"], axis=0) + marks = ( + marks.multiply(df["curr_ntl"].groupby(["date", "identifier"]).mean(), axis=0) + / 100 + ) + marks["manager"] = 0 + return marks.groupby(level="date").sum() -def diff_by_source(df=None): - # diff_by_source: input get_mark_df(), calculate the pricing by source - if df is None: - df = get_mark_df() +def diff_by_source(df): + # use get_mark_df df = df.drop("description", 1) df = df.set_index(["source"], append=True).apply( lambda x: x.curr_ntl * x.mark / 100, axis=1 @@ -98,10 +92,8 @@ def diff_by_source(df=None): return df.groupby(level="date").sum() -def alt_navs(df=None): +def alt_navs(df): navs = ops.get_net_navs().sort_index() - if df is None: - df = get_mark_df() df = calc_mark_diff(df) end_nav, beg_nav, returns, nav_100 = ( pd.DataFrame(), @@ -134,7 +126,7 @@ def back_test( df, values="mark", index=["identifier", "date"], columns="source" ) - sql_string = "select trade_date, identifier, price from bonds where buysell=%s and asset_class = %s" + sql_string = "select trade_date, identifier, price from bond_trades where buysell=%s and asset_class = %s" df_trades = pd.read_sql_query( sql_string, dawn_engine, parse_dates=["trade_date"], params=[False, "Subprime"] ) diff --git a/python/notebooks/Valuation Backtest.ipynb b/python/notebooks/Valuation Backtest.ipynb index 5e64c8e0..f7a66681 100644 --- a/python/notebooks/Valuation Backtest.ipynb +++ b/python/notebooks/Valuation Backtest.ipynb @@ -6,7 +6,7 @@ "metadata": {}, "outputs": [], "source": [ - "from utils.db import dbconn, dbengine, serenitas_engine, dawn_engine\n", + "from serenitas.utils.db import dbconn, dbengine, serenitas_engine, dawn_engine\n", "\n", "import datetime\n", "import mark_backtest_underpar as mark\n", @@ -25,9 +25,8 @@ "source": [ "date = datetime.date.today() - pd.tseries.offsets.MonthEnd(1)\n", "navs = ops.get_net_navs()\n", - "subprime = mark.get_mark_df('Subprime')\n", - "clo = mark.get_mark_df('CLO')\n", - "df = subprime.append(clo)" + "df = mark.get_mark_df('Subprime')\n", + "df = df.append(mark.get_mark_df('CLO'))" ] }, { @@ -150,14 +149,14 @@ "source": [ "#%matplotlib nbagg\n", "#Filtered takes out PB\n", - "to_plot = ['mark_closest_all', 'mark_filtered_mean']\n", - "to_plot1 = ['mark_manager']\n", + "to_plot = ['closest_vendor', 'mean_vendor']\n", + "to_plot1 = ['manager']\n", "plot_df0 = results[1][to_plot]\n", "plot_df1 = results[1][to_plot1]\n", "\n", - "plot_df0 = plot_df0.rename(columns = {'mark_closest_all': 'Third-pary mark closest to LMCG valuation', \\\n", - " 'mark_filtered_mean': 'Average of all third-party marks'})\n", - "plot_df1 = plot_df1.rename(columns = {'mark_manager': 'Marks per fund valuation policy'})\n", + "plot_df0 = plot_df0.rename(columns = {'closest_vendor': 'Third-pary mark closest to LMCG valuation', \\\n", + " 'mean_vendor': 'Average of all third-party marks'})\n", + "plot_df1 = plot_df1.rename(columns = {'manager': 'Marks per fund valuation policy'})\n", "\n", "ax = plot_df0.plot(figsize = [10, 3.5])\n", "ax = plot_df1.plot(marker = 'o', ax = ax)\n", @@ -177,7 +176,7 @@ "outputs": [], "source": [ "#Fund cumulative returns from the last 12 months\n", - "results[1]['mark_manager'][-12:]" + "results[1]['manager'][-12:]" ] }, { @@ -190,6 +189,7 @@ "perf = results[1].groupby(pd.Grouper(freq=\"A\")).last()\n", "perf_ann = perf / perf.shift(1) - 1\n", "perf_ann[\"2013\"] = perf[\"2013\"] / 100 - 1\n", + "perf_ann.to_clipboard()\n", "round(perf_ann*100,2)" ] }, @@ -200,7 +200,7 @@ "outputs": [], "source": [ "#Return using different methodology - Same calulation as above but monthly \n", - "(results[1]/results[1].shift(1) - 1)[-24:][['mark_manager', 'mark_closest_all', 'mark_filtered_mean']]" + "(results[1]/results[1].shift(1) - 1)[-24:][['manager', 'closest_vendor', 'mean_vendor']]" ] }, { @@ -222,11 +222,11 @@ "outputs": [], "source": [ "#% impact historical: positive % means the alternative methodology results in a higher NAV\n", - "nav_impact = alt_nav_impact.divide(alt_nav_impact.endbooknav, axis=0)\n", - "to_plot = ['mark_closest_all', 'mark_filtered_mean']\n", - "nav_impact = nav_impact[to_plot].rename(columns={'mark_closest_all': 'mark to closest', \n", - " 'mark_filtered_mean': 'mark to mean'})\n", - "ax = nav_impact.plot()\n", + "import matplotlib.ticker as mtick\n", + "nav_impact = alt_nav_impact.divide(alt_nav_impact.endbooknav/100, axis=0)\n", + "ax = nav_impact[['closest_vendor', 'mean_vendor']].plot(figsize = [5, 7])\n", + "lgd = ax.legend(loc='upper center', bbox_to_anchor=(0.5, -.1), shadow=True, ncol=3)\n", + "ax.yaxis.set_major_formatter(mtick.PercentFormatter())\n", "ax.figure.savefig(\"/home/serenitas/edwin/PythonGraphs/Valuation_3.png\", bbox_extra_artists=(lgd,), bbox_inches='tight')" ] }, @@ -279,7 +279,7 @@ "all_cf = pd.read_sql_query(\"SELECT * FROM cashflow_history\", dawn_engine,\n", " parse_dates=['date'],\n", " index_col=['date']).sort_index()\n", - "all_trades = pd.read_sql_query(\"SELECT * FROM bonds where fund = %s order by trade_date desc\", dawn_engine,\n", + "all_trades = pd.read_sql_query(\"SELECT * FROM bond_trades where fund = %s order by trade_date desc\", dawn_engine,\n", " parse_dates={'lastupdate':{'utc':True}, 'trade_date': {}, 'settle_date':{}},\n", " index_col = 'trade_date', params=[fund])\n", "all_trades['curr_notional'] = all_trades['principal_payment']/all_trades['price'] *100\n", @@ -350,7 +350,29 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "#Look at accuracy of CLO marks\n", + "sql_string = \"select date, a.identifier, mark, source from external_marks a \" \\\n", + " \"left join bonds b on a.identifier = b.identifier where asset_class = 'CLO'\"\n", + "clo_marks = pd.read_sql_query(sql_string, dawn_engine,\n", + " parse_dates='date',\n", + " index_col = 'date')\n", + "clo_marks = clo_marks.reset_index().merge(clo_marks.groupby(['date', 'identifier']).mean(), on=['identifier', 'date'])\n", + "\n", + "#filter out crap marks and change labels for brokers, and regroup them \n", + "clo_marks = clo_marks[clo_marks['source'] != 'VENDOR&ORBROKER_AVG']\n", + "clo_marks.loc[clo_marks['source'].str.contains('BROKER'), 'source'] = 'BROKER'\n", + "clo_marks = clo_marks.groupby(['date', 'identifier', 'source']).mean()\n", + "\n", + "clo_marks = clo_marks.loc['2019-01-01':,:,:]\n", + "clo_marks = clo_marks.rename(columns={'mark_y': 'avg_cusip_mark'})\n", + "clo_marks['diff'] = (clo_marks['mark_x'] - clo_marks['avg_cusip_mark'])/clo_marks['mark_x']\n", + "\n", + "g = clo_marks.groupby('source')['diff']\n", + "print('sample size', g.count())\n", + "print('mean diff', g.mean())\n", + "print('std_dev of diff from mean', g.std()/np.sqrt(g.count()))\n" + ] } ], "metadata": { @@ -369,7 +391,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.9.1-final" } }, "nbformat": 4, |
