1 files changed, 30 insertions, 57 deletions
diff --git a/python/mark_backtest_underpar.py b/python/mark_backtest_underpar.py
index 401ef81c..821d86a7 100644
--- a/python/mark_backtest_underpar.py
+++ b/python/mark_backtest_underpar.py
@@ -5,13 +5,11 @@ import matplotlib.pyplot as plt
 import statsmodels.api as sm
 import seaborn as sb
 import datetime
+import globeop_reports as ops
 
 from statsmodels.formula.api import gls
-
 from utils.db import serenitas_engine, dawn_engine, DataError
 
-import globeop_reports as ops
-
 
 def get_mark_df(asset_class="Subprime"):
     # It used to be >1000 but as of 12/17/2018 changed it to 3000
@@ -22,7 +20,10 @@ def get_mark_df(asset_class="Subprime"):
     df_trades = pd.DataFrame()
     for date in df_external_marks.date.unique():
         df_temp = pd.read_sql_query(
-            "select description, identifier, usd_market_value/price*100 as curr_ntl from risk_positions(%s, %s) where price >0 and length(identifier) = 9 ",
+            "select description, identifier, "
+            "usd_market_value/price*100 as curr_ntl "
+            "from risk_positions(%s, %s) "
+            "where price >0 and length(identifier) = 9 ",
             dawn_engine,
             params=[date, asset_class],
         )
@@ -32,20 +33,21 @@ def get_mark_df(asset_class="Subprime"):
     return df.set_index(["date", "identifier"])
 
 
-def calc_mark_diff(
-    df,
-    sources=[
-        "PRICESERVE",
-        "PRICINGDIRECT",
-        "BVAL",
-        "MARKIT",
-        "BROKER",
-        "REUTERS",
-        "S&P",
-        "IDC",
-    ],
-):
+def calc_mark_diff(df=None, sources=None):
 
+    if sources is None:
+        sources = [
+            "PRICESERVE",
+            "PRICINGDIRECT",
+            "BVAL",
+            "MARKIT",
+            "BROKER",
+            "REUTERS",
+            "S&P",
+            "IDC",
+        ]
+    if df is None:
+        df = get_mark_df()
     # All Sources (including manager...?!) - average, manager mark only, median, closest
     g = df.groupby(level=["date", "identifier"])
     diff = g.mean()
@@ -72,11 +74,8 @@ def calc_mark_diff(
 
 
 def closest(x):
-    if x.mark.count() > 1:
-        x["dist"] = abs(x.mark - x.mark[x.source == "MANAGER"])
-        return x.mark[x.dist == x.dist[x.dist > 0].min()].iloc[0]
-    else:
-        return x.mark[0]
+    x["dist"] = abs(x.mark - x.mark[x.source == "MANAGER"]).fillna(0)
+    return x.mark[x.dist == x.dist.min()].iloc[0]
 
 
 def remove_max_min(x):
@@ -86,8 +85,10 @@ def remove_max_min(x):
         return x.mean()
 
 
-def diff_by_source(df):
+def diff_by_source(df=None):
     # diff_by_source: input get_mark_df(), calculate the pricing by source
+    if df is None:
+        df = get_mark_df()
     df = df.drop("description", 1)
     df = df.set_index(["source"], append=True).apply(
         lambda x: x.curr_ntl * x.mark / 100, axis=1
@@ -97,24 +98,11 @@ def diff_by_source(df):
     return df.groupby(level="date").sum()
 
 
-def diff_by_source_percentage(df):
-    df = diff_by_source(df)
-    df = df.join(ops.get_net_navs()["endbooknav"])
-    df = df.apply(lambda x: (x / x.endbooknav), axis=1)
-    del df["endbooknav"]
-    return df
-
-
-def count_sources(df):
-    # input get_mark_df(), plot count of each source
-    g2 = df.set_index("source", append=True).groupby(level=["date", "source"])
-    # there are a good amount of Bloomberg duplicates, not a big deal but should clean them up
-    g2["mark"].count().unstack(-1).plot()
-
-
-def alt_navs():
-    navs = ops.get_net_navs()
-    df = calc_mark_diff(get_mark_df())
+def alt_navs(df=None):
+    navs = ops.get_net_navs().sort_index()
+    if df is None:
+        df = get_mark_df()
+    df = calc_mark_diff(df)
     end_nav, beg_nav, returns, nav_100 = (
         pd.DataFrame(),
         pd.DataFrame(),
@@ -125,28 +113,13 @@ def alt_navs():
         end_nav[col] = df[col] + navs.endbooknav
         beg_nav[col] = end_nav[col].shift(1) + navs.net_flow.shift(1)
         beg_nav[col].iloc[0] = 12500000
-        returns[col] = (end_nav[col] - navs.incentive) / beg_nav[col] - 1
+        returns[col] = (end_nav[col] - navs.incentive_fee) / beg_nav[col] - 1
     for i, row in returns.dropna().reset_index().iterrows():
         nav_100.iloc[i] = 100 if i == 0 else nav_100.iloc[i - 1]
         nav_100.iloc[i] = nav_100.iloc[i] * (1 + returns.iloc[i])
     return returns, nav_100
 
 
-def annual_performance(nav_100):
-    perf = nav_100.groupby(pd.Grouper(freq="A")).last()
-    perf_ann = perf / perf.shift(1) - 1
-    perf_ann["2013"] = perf["2013"] / 100 - 1
-    return perf_ann
-
-
-def alt_nav_impact():
-    navs = ops.get_net_navs()
-    df = calc_mark_diff(get_mark_df())
-    df = df.join(navs.endbooknav)
-    return df
-    # return df.iloc[-1]/df.iloc[-1]['endbooknav']
-
-
 def back_test(
     begindate="2013-01-01", enddate=datetime.date.today(), sell_price_threshold=200
 ):