aboutsummaryrefslogtreecommitdiffstats
path: root/python/mark_backtest_underpar.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/mark_backtest_underpar.py')
-rw-r--r--python/mark_backtest_underpar.py102
1 files changed, 47 insertions, 55 deletions
diff --git a/python/mark_backtest_underpar.py b/python/mark_backtest_underpar.py
index 821d86a7..dcf2ba0d 100644
--- a/python/mark_backtest_underpar.py
+++ b/python/mark_backtest_underpar.py
@@ -8,7 +8,7 @@ import datetime
import globeop_reports as ops
from statsmodels.formula.api import gls
-from utils.db import serenitas_engine, dawn_engine, DataError
+from serenitas.utils.db import serenitas_engine, dawn_engine, DataError
def get_mark_df(asset_class="Subprime"):
@@ -18,7 +18,7 @@ def get_mark_df(asset_class="Subprime"):
"select * from external_marks_mapped where mark < 3000", dawn_engine
)
df_trades = pd.DataFrame()
- for date in df_external_marks.date.unique():
+ for date in df_external_marks["date"].unique():
df_temp = pd.read_sql_query(
"select description, identifier, "
"usd_market_value/price*100 as curr_ntl "
@@ -33,62 +33,56 @@ def get_mark_df(asset_class="Subprime"):
return df.set_index(["date", "identifier"])
-def calc_mark_diff(df=None, sources=None):
-
- if sources is None:
- sources = [
- "PRICESERVE",
- "PRICINGDIRECT",
- "BVAL",
- "MARKIT",
- "BROKER",
- "REUTERS",
- "S&P",
- "IDC",
- ]
- if df is None:
- df = get_mark_df()
- # All Sources (including manager...?!) - average, manager mark only, median, closest
- g = df.groupby(level=["date", "identifier"])
- diff = g.mean()
- diff = diff.join(df[df.source == "MANAGER"]["mark"], rsuffix="_manager")
- diff = diff.join(g.median()["mark"], rsuffix="_median_all")
- temp = g.apply(closest)
- temp = temp.rename("mark_closest_all")
- diff = diff.join(temp)
-
- # Filtered Sources - mean, median, remove max min
- df_filtered = df[df.source.isin(sources)]
- g1 = df_filtered.groupby(level=["date", "identifier"])
- diff = diff.join(g1.mean()["mark"], rsuffix="_filtered_mean")
- diff = diff.join(g1.median()["mark"], rsuffix="_filtered_median")
- diff = diff.join(g1.mark.apply(remove_max_min), rsuffix="_filtered_no_max_min")
-
- # calculate difference: negative means Serenitas marks higher
- diff = diff.multiply(diff.curr_ntl / 100, axis="index")
- del diff["curr_ntl"]
- diff = diff.rename(columns={"mark": "mark_mean_all"})
- diff = diff.apply(lambda x: (x - x.mark_manager), axis=1)
+def calc_mark_diff(df, source_remove=["PB"]):
+ # use get_mark_df
+ df = df[~df.source.isin(source_remove)]
+ df = (
+ df.set_index("source", append=True)
+ .groupby(level=["date", "identifier", "source"])
+ .mean()
+ )
+ marks = df["mark"].unstack(-1).dropna(subset=["MANAGER"])
- return diff.groupby(level="date").sum()
+ def remove_max_min(x):
+ x = x.dropna()
+ return (
+ (x.sum() - x.max() - x.min()) / (x.count() - 2)
+ if x.count() >= 4
+ else x.mean()
+ )
+ def pick_closest(x):
+ dist = abs(x.dropna() - x["MANAGER"])
+ if dist.count() > 1:
+ dist.drop("MANAGER", inplace=True)
+ return x[dist.idxmin]
-def closest(x):
- x["dist"] = abs(x.mark - x.mark[x.source == "MANAGER"]).fillna(0)
- return x.mark[x.dist == x.dist.min()].iloc[0]
+ def mean_with_default(x):
+ return (
+ x["MANAGER"] if x.drop("MANAGER").count() == 0 else x.drop("MANAGER").mean()
+ )
+ marks = pd.concat(
+ [
+ marks.apply(mean_with_default, axis=1),
+ marks.apply(remove_max_min, axis=1),
+ marks.apply(pick_closest, axis=1),
+ ],
+ keys=["mean_vendor", "no_min_max", "closest_vendor"],
+ axis=1,
+ )
-def remove_max_min(x):
- if x.count() >= 4:
- return (x.sum() - x.max() - x.min()) / (x.count() - 2)
- else:
- return x.mean()
+ marks = marks.subtract(df.xs("MANAGER", level="source")["mark"], axis=0)
+ marks = (
+ marks.multiply(df["curr_ntl"].groupby(["date", "identifier"]).mean(), axis=0)
+ / 100
+ )
+ marks["manager"] = 0
+ return marks.groupby(level="date").sum()
-def diff_by_source(df=None):
- # diff_by_source: input get_mark_df(), calculate the pricing by source
- if df is None:
- df = get_mark_df()
+def diff_by_source(df):
+ # use get_mark_df
df = df.drop("description", 1)
df = df.set_index(["source"], append=True).apply(
lambda x: x.curr_ntl * x.mark / 100, axis=1
@@ -98,10 +92,8 @@ def diff_by_source(df=None):
return df.groupby(level="date").sum()
-def alt_navs(df=None):
+def alt_navs(df):
navs = ops.get_net_navs().sort_index()
- if df is None:
- df = get_mark_df()
df = calc_mark_diff(df)
end_nav, beg_nav, returns, nav_100 = (
pd.DataFrame(),
@@ -134,7 +126,7 @@ def back_test(
df, values="mark", index=["identifier", "date"], columns="source"
)
- sql_string = "select trade_date, identifier, price from bonds where buysell=%s and asset_class = %s"
+ sql_string = "select trade_date, identifier, price from bond_trades where buysell=%s and asset_class = %s"
df_trades = pd.read_sql_query(
sql_string, dawn_engine, parse_dates=["trade_date"], params=[False, "Subprime"]
)