aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/mark_backtest.py78
1 files changed, 78 insertions, 0 deletions
diff --git a/python/mark_backtest.py b/python/mark_backtest.py
new file mode 100644
index 00000000..970e8b6f
--- /dev/null
+++ b/python/mark_backtest.py
@@ -0,0 +1,78 @@
+import pandas as pd
+from db import dbengine
+import matplotlib
+import numpy as np
+import matplotlib.pyplot as plt
+import statsmodels.api as sm
+from statsmodels.formula.api import gls
+
+df = (pd.read_sql_table('external_marks_mapped',dbengine('dawndb'),
+ parse_dates=['date']).
+ sort_values('date'))
+df1 = pd.read_sql_query("select trade_date, cusip as identifier, principal_payment, price from bonds where buysell = 'f'"
+ ,dbengine('dawndb'),
+ parse_dates=[
+ 'trade_date'
+ ])
+df1 = df1.sort_values('trade_date')
+d = {}
+for source, g in df.groupby('source'):
+ d[source] = pd.merge_asof(df1, g, left_on='trade_date', right_on= 'date', by='identifier')
+df2 = pd.concat(d, join='inner')
+df3 = df2.dropna(axis=0, subset=['mark'])
+df3 = df3.reset_index(level=1, drop=True)
+df4 = df3.set_index(['trade_date','identifier', 'source', 'principal_payment'])
+df4 = df4.groupby(level=['trade_date', 'identifier','source', 'principal_payment']).mean()
+df4 = df4.set_index('price', append=True)
+df5 = df4.unstack(-3)
+df5.columns = df5.columns.droplevel()
+#df6 = df5.reset_index(col_level=1, drop=True)
+df6 = df5.sortlevel(0, ascending = False)
+df6 = df6.reset_index()
+
+r2 = {}
+stderr = {}
+parameters = {}
+#for g in df5['mark']:
+for col in df6.columns[4:]:
+ regmodel = gls(formula= "df6['price'] ~df6[col]", data = df6).fit()
+ r2[col] = regmodel.rsquared
+ stderr[col] = regmodel.bse
+ parameters[col] = regmodel.params
+
+r2 = pd.Series(r2, name= 'R2')
+r2.index.name = 'Source'
+r2.reset_index
+r2 = pd.DataFrame(r2)
+stderr = pd.DataFrame(stderr)
+parameters = pd.DataFrame(parameters)
+
+df7 = df6
+for col in df7.columns[4:]:
+ df7[col] = (df7.price - df7[col])/df7[col]
+
+filter = 2
+begindate = '2015-01-01'
+enddate = '2017-01-01'
+df8 = df7[(df7['trade_date']>begindate) & (df7['trade_date']<enddate)]
+difference= pd.DataFrame(df8[df8<filter].mean(), columns=['Average'])
+difference['Std Dev'] = df8[df8<filter].std()
+difference.reset_index()
+
+writer = pd.ExcelWriter('/home/serenitas/edwin/Subprime_ModelandBacktest_outputonly.xlsx')
+df6.to_excel(writer, sheet_name = 'BackTesting', header = True, na_rep = '', startrow= 1, startcol= 1)
+r2.to_excel(writer, sheet_name = 'regression', header = True)
+parameters.to_excel(writer, sheet_name = 'regression', header = True, startrow= 0, startcol= 5)
+difference[2:].to_excel(writer, sheet_name = 'Diff', header = True, startrow= 1, startcol= 1)
+df7.to_excel(writer, sheet_name = 'Diff', header = True, startrow= 15, startcol= 1)
+writer.save()
+
+#fig = plt.plot(figsize=(12,8))
+#fig = sm.graphics.plot_partregress_grid(regmodel,fig=fig)
+
+
+#df6 = pd.DataFrame()
+#for g in df5['mark']:
+# d[g] = (df5['price'] - df5['mark'][g])/df5['mark'][g]
+#df6.from_dict(d)
+