aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/mark_backtest_backfill.py23
-rw-r--r--python/mark_backtest_underpar.py102
-rw-r--r--python/notebooks/Valuation Backtest.ipynb60
3 files changed, 108 insertions, 77 deletions
diff --git a/python/mark_backtest_backfill.py b/python/mark_backtest_backfill.py
index 82187f9c..5a35edf1 100644
--- a/python/mark_backtest_backfill.py
+++ b/python/mark_backtest_backfill.py
@@ -3,9 +3,9 @@ import os
import re
import psycopg2
import datetime
-import bbg_helpers
+import serenitas.analytics.bbg_helpers
-from utils.db import dbconn, serenitas_engine, dawn_engine, DataError
+from serenitas.utils.db import dbconn, serenitas_engine, dawn_engine, DataError
from pickle import dumps
from sqlalchemy import create_engine
from itertools import chain
@@ -28,7 +28,9 @@ def runSingleFill(f):
+ MonthEnd()
)
# change /usr/lib/python3.6/site-packages/xlrd/xlsx.py line 609 to check for ":" in ref as well. Otherwise single merged cells bombs
- marks = pd.read_excel(f, sheet_name, skiprows=11, usecols=range_name)
+ marks = pd.read_excel(
+ f, sheet_name, skiprows=11, usecols=range_name, engine="openpyxl"
+ )
marks = marks.rename(columns=lambda x: x.replace(".1", ""))
df = pd.DataFrame()
@@ -123,6 +125,21 @@ def get_globs():
settings = {
+ "ReviewedPack.jmircovi.SERENITAS.SERCGMAST.20201201.20201231.Draft.xlsx": (
+ "LA:MD",
+ "Securities Valuation Details",
+ "Y",
+ ),
+ "ReviewedPack.jmircovi.SERENITAS.SERCGMAST.20201101.20201130.Draft.xlsx": (
+ "JA:JX",
+ "Securities Valuation Details",
+ "Y",
+ ),
+ "ReviewedPack.jmircovi.SERENITAS.SERCGMAST.20201001.20201031.Draft.xlsx": (
+ "JA:JX",
+ "Securities Valuation Details",
+ "Y",
+ ),
"ReviewedPack.jmircovi.SERENITAS.SERCGMAST.20200901.20200930.Draft.xlsx": (
"JA:JX",
"Securities Valuation Details",
diff --git a/python/mark_backtest_underpar.py b/python/mark_backtest_underpar.py
index 821d86a7..dcf2ba0d 100644
--- a/python/mark_backtest_underpar.py
+++ b/python/mark_backtest_underpar.py
@@ -8,7 +8,7 @@ import datetime
import globeop_reports as ops
from statsmodels.formula.api import gls
-from utils.db import serenitas_engine, dawn_engine, DataError
+from serenitas.utils.db import serenitas_engine, dawn_engine, DataError
def get_mark_df(asset_class="Subprime"):
@@ -18,7 +18,7 @@ def get_mark_df(asset_class="Subprime"):
"select * from external_marks_mapped where mark < 3000", dawn_engine
)
df_trades = pd.DataFrame()
- for date in df_external_marks.date.unique():
+ for date in df_external_marks["date"].unique():
df_temp = pd.read_sql_query(
"select description, identifier, "
"usd_market_value/price*100 as curr_ntl "
@@ -33,62 +33,56 @@ def get_mark_df(asset_class="Subprime"):
return df.set_index(["date", "identifier"])
-def calc_mark_diff(df=None, sources=None):
-
- if sources is None:
- sources = [
- "PRICESERVE",
- "PRICINGDIRECT",
- "BVAL",
- "MARKIT",
- "BROKER",
- "REUTERS",
- "S&P",
- "IDC",
- ]
- if df is None:
- df = get_mark_df()
- # All Sources (including manager...?!) - average, manager mark only, median, closest
- g = df.groupby(level=["date", "identifier"])
- diff = g.mean()
- diff = diff.join(df[df.source == "MANAGER"]["mark"], rsuffix="_manager")
- diff = diff.join(g.median()["mark"], rsuffix="_median_all")
- temp = g.apply(closest)
- temp = temp.rename("mark_closest_all")
- diff = diff.join(temp)
-
- # Filtered Sources - mean, median, remove max min
- df_filtered = df[df.source.isin(sources)]
- g1 = df_filtered.groupby(level=["date", "identifier"])
- diff = diff.join(g1.mean()["mark"], rsuffix="_filtered_mean")
- diff = diff.join(g1.median()["mark"], rsuffix="_filtered_median")
- diff = diff.join(g1.mark.apply(remove_max_min), rsuffix="_filtered_no_max_min")
-
- # calculate difference: negative means Serenitas marks higher
- diff = diff.multiply(diff.curr_ntl / 100, axis="index")
- del diff["curr_ntl"]
- diff = diff.rename(columns={"mark": "mark_mean_all"})
- diff = diff.apply(lambda x: (x - x.mark_manager), axis=1)
+def calc_mark_diff(df, source_remove=["PB"]):
+ # use get_mark_df
+ df = df[~df.source.isin(source_remove)]
+ df = (
+ df.set_index("source", append=True)
+ .groupby(level=["date", "identifier", "source"])
+ .mean()
+ )
+ marks = df["mark"].unstack(-1).dropna(subset=["MANAGER"])
- return diff.groupby(level="date").sum()
+ def remove_max_min(x):
+ x = x.dropna()
+ return (
+ (x.sum() - x.max() - x.min()) / (x.count() - 2)
+ if x.count() >= 4
+ else x.mean()
+ )
+ def pick_closest(x):
+ dist = abs(x.dropna() - x["MANAGER"])
+ if dist.count() > 1:
+ dist.drop("MANAGER", inplace=True)
+ return x[dist.idxmin]
-def closest(x):
- x["dist"] = abs(x.mark - x.mark[x.source == "MANAGER"]).fillna(0)
- return x.mark[x.dist == x.dist.min()].iloc[0]
+ def mean_with_default(x):
+ return (
+ x["MANAGER"] if x.drop("MANAGER").count() == 0 else x.drop("MANAGER").mean()
+ )
+ marks = pd.concat(
+ [
+ marks.apply(mean_with_default, axis=1),
+ marks.apply(remove_max_min, axis=1),
+ marks.apply(pick_closest, axis=1),
+ ],
+ keys=["mean_vendor", "no_min_max", "closest_vendor"],
+ axis=1,
+ )
-def remove_max_min(x):
- if x.count() >= 4:
- return (x.sum() - x.max() - x.min()) / (x.count() - 2)
- else:
- return x.mean()
+ marks = marks.subtract(df.xs("MANAGER", level="source")["mark"], axis=0)
+ marks = (
+ marks.multiply(df["curr_ntl"].groupby(["date", "identifier"]).mean(), axis=0)
+ / 100
+ )
+ marks["manager"] = 0
+ return marks.groupby(level="date").sum()
-def diff_by_source(df=None):
- # diff_by_source: input get_mark_df(), calculate the pricing by source
- if df is None:
- df = get_mark_df()
+def diff_by_source(df):
+ # use get_mark_df
df = df.drop("description", 1)
df = df.set_index(["source"], append=True).apply(
lambda x: x.curr_ntl * x.mark / 100, axis=1
@@ -98,10 +92,8 @@ def diff_by_source(df=None):
return df.groupby(level="date").sum()
-def alt_navs(df=None):
+def alt_navs(df):
navs = ops.get_net_navs().sort_index()
- if df is None:
- df = get_mark_df()
df = calc_mark_diff(df)
end_nav, beg_nav, returns, nav_100 = (
pd.DataFrame(),
@@ -134,7 +126,7 @@ def back_test(
df, values="mark", index=["identifier", "date"], columns="source"
)
- sql_string = "select trade_date, identifier, price from bonds where buysell=%s and asset_class = %s"
+ sql_string = "select trade_date, identifier, price from bond_trades where buysell=%s and asset_class = %s"
df_trades = pd.read_sql_query(
sql_string, dawn_engine, parse_dates=["trade_date"], params=[False, "Subprime"]
)
diff --git a/python/notebooks/Valuation Backtest.ipynb b/python/notebooks/Valuation Backtest.ipynb
index 5e64c8e0..f7a66681 100644
--- a/python/notebooks/Valuation Backtest.ipynb
+++ b/python/notebooks/Valuation Backtest.ipynb
@@ -6,7 +6,7 @@
"metadata": {},
"outputs": [],
"source": [
- "from utils.db import dbconn, dbengine, serenitas_engine, dawn_engine\n",
+ "from serenitas.utils.db import dbconn, dbengine, serenitas_engine, dawn_engine\n",
"\n",
"import datetime\n",
"import mark_backtest_underpar as mark\n",
@@ -25,9 +25,8 @@
"source": [
"date = datetime.date.today() - pd.tseries.offsets.MonthEnd(1)\n",
"navs = ops.get_net_navs()\n",
- "subprime = mark.get_mark_df('Subprime')\n",
- "clo = mark.get_mark_df('CLO')\n",
- "df = subprime.append(clo)"
+ "df = mark.get_mark_df('Subprime')\n",
+ "df = df.append(mark.get_mark_df('CLO'))"
]
},
{
@@ -150,14 +149,14 @@
"source": [
"#%matplotlib nbagg\n",
"#Filtered takes out PB\n",
- "to_plot = ['mark_closest_all', 'mark_filtered_mean']\n",
- "to_plot1 = ['mark_manager']\n",
+ "to_plot = ['closest_vendor', 'mean_vendor']\n",
+ "to_plot1 = ['manager']\n",
"plot_df0 = results[1][to_plot]\n",
"plot_df1 = results[1][to_plot1]\n",
"\n",
- "plot_df0 = plot_df0.rename(columns = {'mark_closest_all': 'Third-pary mark closest to LMCG valuation', \\\n",
- " 'mark_filtered_mean': 'Average of all third-party marks'})\n",
- "plot_df1 = plot_df1.rename(columns = {'mark_manager': 'Marks per fund valuation policy'})\n",
+ "plot_df0 = plot_df0.rename(columns = {'closest_vendor': 'Third-pary mark closest to LMCG valuation', \\\n",
+ " 'mean_vendor': 'Average of all third-party marks'})\n",
+ "plot_df1 = plot_df1.rename(columns = {'manager': 'Marks per fund valuation policy'})\n",
"\n",
"ax = plot_df0.plot(figsize = [10, 3.5])\n",
"ax = plot_df1.plot(marker = 'o', ax = ax)\n",
@@ -177,7 +176,7 @@
"outputs": [],
"source": [
"#Fund cumulative returns from the last 12 months\n",
- "results[1]['mark_manager'][-12:]"
+ "results[1]['manager'][-12:]"
]
},
{
@@ -190,6 +189,7 @@
"perf = results[1].groupby(pd.Grouper(freq=\"A\")).last()\n",
"perf_ann = perf / perf.shift(1) - 1\n",
"perf_ann[\"2013\"] = perf[\"2013\"] / 100 - 1\n",
+ "perf_ann.to_clipboard()\n",
"round(perf_ann*100,2)"
]
},
@@ -200,7 +200,7 @@
"outputs": [],
"source": [
"#Return using different methodology - Same calulation as above but monthly \n",
- "(results[1]/results[1].shift(1) - 1)[-24:][['mark_manager', 'mark_closest_all', 'mark_filtered_mean']]"
+ "(results[1]/results[1].shift(1) - 1)[-24:][['manager', 'closest_vendor', 'mean_vendor']]"
]
},
{
@@ -222,11 +222,11 @@
"outputs": [],
"source": [
"#% impact historical: positive % means the alternative methodology results in a higher NAV\n",
- "nav_impact = alt_nav_impact.divide(alt_nav_impact.endbooknav, axis=0)\n",
- "to_plot = ['mark_closest_all', 'mark_filtered_mean']\n",
- "nav_impact = nav_impact[to_plot].rename(columns={'mark_closest_all': 'mark to closest', \n",
- " 'mark_filtered_mean': 'mark to mean'})\n",
- "ax = nav_impact.plot()\n",
+ "import matplotlib.ticker as mtick\n",
+ "nav_impact = alt_nav_impact.divide(alt_nav_impact.endbooknav/100, axis=0)\n",
+ "ax = nav_impact[['closest_vendor', 'mean_vendor']].plot(figsize = [5, 7])\n",
+ "lgd = ax.legend(loc='upper center', bbox_to_anchor=(0.5, -.1), shadow=True, ncol=3)\n",
+ "ax.yaxis.set_major_formatter(mtick.PercentFormatter())\n",
"ax.figure.savefig(\"/home/serenitas/edwin/PythonGraphs/Valuation_3.png\", bbox_extra_artists=(lgd,), bbox_inches='tight')"
]
},
@@ -279,7 +279,7 @@
"all_cf = pd.read_sql_query(\"SELECT * FROM cashflow_history\", dawn_engine,\n",
" parse_dates=['date'],\n",
" index_col=['date']).sort_index()\n",
- "all_trades = pd.read_sql_query(\"SELECT * FROM bonds where fund = %s order by trade_date desc\", dawn_engine,\n",
+ "all_trades = pd.read_sql_query(\"SELECT * FROM bond_trades where fund = %s order by trade_date desc\", dawn_engine,\n",
" parse_dates={'lastupdate':{'utc':True}, 'trade_date': {}, 'settle_date':{}},\n",
" index_col = 'trade_date', params=[fund])\n",
"all_trades['curr_notional'] = all_trades['principal_payment']/all_trades['price'] *100\n",
@@ -350,7 +350,29 @@
"execution_count": null,
"metadata": {},
"outputs": [],
- "source": []
+ "source": [
+ "#Look at accuracy of CLO marks\n",
+ "sql_string = \"select date, a.identifier, mark, source from external_marks a \" \\\n",
+ " \"left join bonds b on a.identifier = b.identifier where asset_class = 'CLO'\"\n",
+ "clo_marks = pd.read_sql_query(sql_string, dawn_engine,\n",
+ " parse_dates='date',\n",
+ " index_col = 'date')\n",
+ "clo_marks = clo_marks.reset_index().merge(clo_marks.groupby(['date', 'identifier']).mean(), on=['identifier', 'date'])\n",
+ "\n",
+ "#filter out crap marks and change labels for brokers, and regroup them \n",
+ "clo_marks = clo_marks[clo_marks['source'] != 'VENDOR&ORBROKER_AVG']\n",
+ "clo_marks.loc[clo_marks['source'].str.contains('BROKER'), 'source'] = 'BROKER'\n",
+ "clo_marks = clo_marks.groupby(['date', 'identifier', 'source']).mean()\n",
+ "\n",
+ "clo_marks = clo_marks.loc['2019-01-01':,:,:]\n",
+ "clo_marks = clo_marks.rename(columns={'mark_y': 'avg_cusip_mark'})\n",
+ "clo_marks['diff'] = (clo_marks['mark_x'] - clo_marks['avg_cusip_mark'])/clo_marks['mark_x']\n",
+ "\n",
+ "g = clo_marks.groupby('source')['diff']\n",
+ "print('sample size', g.count())\n",
+ "print('mean diff', g.mean())\n",
+ "print('std_dev of diff from mean', g.std()/np.sqrt(g.count()))\n"
+ ]
}
],
"metadata": {
@@ -369,7 +391,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.8.5"
+ "version": "3.9.1-final"
}
},
"nbformat": 4,