diff options
Diffstat (limited to 'python/mark_backtest_underpar.py')
| -rw-r--r-- | python/mark_backtest_underpar.py | 102 |
1 files changed, 47 insertions, 55 deletions
diff --git a/python/mark_backtest_underpar.py b/python/mark_backtest_underpar.py index 821d86a7..dcf2ba0d 100644 --- a/python/mark_backtest_underpar.py +++ b/python/mark_backtest_underpar.py @@ -8,7 +8,7 @@ import datetime import globeop_reports as ops from statsmodels.formula.api import gls -from utils.db import serenitas_engine, dawn_engine, DataError +from serenitas.utils.db import serenitas_engine, dawn_engine, DataError def get_mark_df(asset_class="Subprime"): @@ -18,7 +18,7 @@ def get_mark_df(asset_class="Subprime"): "select * from external_marks_mapped where mark < 3000", dawn_engine ) df_trades = pd.DataFrame() - for date in df_external_marks.date.unique(): + for date in df_external_marks["date"].unique(): df_temp = pd.read_sql_query( "select description, identifier, " "usd_market_value/price*100 as curr_ntl " @@ -33,62 +33,56 @@ def get_mark_df(asset_class="Subprime"): return df.set_index(["date", "identifier"]) -def calc_mark_diff(df=None, sources=None): - - if sources is None: - sources = [ - "PRICESERVE", - "PRICINGDIRECT", - "BVAL", - "MARKIT", - "BROKER", - "REUTERS", - "S&P", - "IDC", - ] - if df is None: - df = get_mark_df() - # All Sources (including manager...?!) - average, manager mark only, median, closest - g = df.groupby(level=["date", "identifier"]) - diff = g.mean() - diff = diff.join(df[df.source == "MANAGER"]["mark"], rsuffix="_manager") - diff = diff.join(g.median()["mark"], rsuffix="_median_all") - temp = g.apply(closest) - temp = temp.rename("mark_closest_all") - diff = diff.join(temp) - - # Filtered Sources - mean, median, remove max min - df_filtered = df[df.source.isin(sources)] - g1 = df_filtered.groupby(level=["date", "identifier"]) - diff = diff.join(g1.mean()["mark"], rsuffix="_filtered_mean") - diff = diff.join(g1.median()["mark"], rsuffix="_filtered_median") - diff = diff.join(g1.mark.apply(remove_max_min), rsuffix="_filtered_no_max_min") - - # calculate difference: negative means Serenitas marks higher - diff = diff.multiply(diff.curr_ntl / 100, axis="index") - del diff["curr_ntl"] - diff = diff.rename(columns={"mark": "mark_mean_all"}) - diff = diff.apply(lambda x: (x - x.mark_manager), axis=1) +def calc_mark_diff(df, source_remove=["PB"]): + # use get_mark_df + df = df[~df.source.isin(source_remove)] + df = ( + df.set_index("source", append=True) + .groupby(level=["date", "identifier", "source"]) + .mean() + ) + marks = df["mark"].unstack(-1).dropna(subset=["MANAGER"]) - return diff.groupby(level="date").sum() + def remove_max_min(x): + x = x.dropna() + return ( + (x.sum() - x.max() - x.min()) / (x.count() - 2) + if x.count() >= 4 + else x.mean() + ) + def pick_closest(x): + dist = abs(x.dropna() - x["MANAGER"]) + if dist.count() > 1: + dist.drop("MANAGER", inplace=True) + return x[dist.idxmin] -def closest(x): - x["dist"] = abs(x.mark - x.mark[x.source == "MANAGER"]).fillna(0) - return x.mark[x.dist == x.dist.min()].iloc[0] + def mean_with_default(x): + return ( + x["MANAGER"] if x.drop("MANAGER").count() == 0 else x.drop("MANAGER").mean() + ) + marks = pd.concat( + [ + marks.apply(mean_with_default, axis=1), + marks.apply(remove_max_min, axis=1), + marks.apply(pick_closest, axis=1), + ], + keys=["mean_vendor", "no_min_max", "closest_vendor"], + axis=1, + ) -def remove_max_min(x): - if x.count() >= 4: - return (x.sum() - x.max() - x.min()) / (x.count() - 2) - else: - return x.mean() + marks = marks.subtract(df.xs("MANAGER", level="source")["mark"], axis=0) + marks = ( + marks.multiply(df["curr_ntl"].groupby(["date", "identifier"]).mean(), axis=0) + / 100 + ) + marks["manager"] = 0 + return marks.groupby(level="date").sum() -def diff_by_source(df=None): - # diff_by_source: input get_mark_df(), calculate the pricing by source - if df is None: - df = get_mark_df() +def diff_by_source(df): + # use get_mark_df df = df.drop("description", 1) df = df.set_index(["source"], append=True).apply( lambda x: x.curr_ntl * x.mark / 100, axis=1 @@ -98,10 +92,8 @@ def diff_by_source(df=None): return df.groupby(level="date").sum() -def alt_navs(df=None): +def alt_navs(df): navs = ops.get_net_navs().sort_index() - if df is None: - df = get_mark_df() df = calc_mark_diff(df) end_nav, beg_nav, returns, nav_100 = ( pd.DataFrame(), @@ -134,7 +126,7 @@ def back_test( df, values="mark", index=["identifier", "date"], columns="source" ) - sql_string = "select trade_date, identifier, price from bonds where buysell=%s and asset_class = %s" + sql_string = "select trade_date, identifier, price from bond_trades where buysell=%s and asset_class = %s" df_trades = pd.read_sql_query( sql_string, dawn_engine, parse_dates=["trade_date"], params=[False, "Subprime"] ) |
