diff options
Diffstat (limited to 'python')
| -rw-r--r-- | python/mark_backtest.py | 78 |
1 files changed, 78 insertions, 0 deletions
diff --git a/python/mark_backtest.py b/python/mark_backtest.py new file mode 100644 index 00000000..970e8b6f --- /dev/null +++ b/python/mark_backtest.py @@ -0,0 +1,78 @@ +import pandas as pd +from db import dbengine +import matplotlib +import numpy as np +import matplotlib.pyplot as plt +import statsmodels.api as sm +from statsmodels.formula.api import gls + +df = (pd.read_sql_table('external_marks_mapped',dbengine('dawndb'), + parse_dates=['date']). + sort_values('date')) +df1 = pd.read_sql_query("select trade_date, cusip as identifier, principal_payment, price from bonds where buysell = 'f'" + ,dbengine('dawndb'), + parse_dates=[ + 'trade_date' + ]) +df1 = df1.sort_values('trade_date') +d = {} +for source, g in df.groupby('source'): + d[source] = pd.merge_asof(df1, g, left_on='trade_date', right_on= 'date', by='identifier') +df2 = pd.concat(d, join='inner') +df3 = df2.dropna(axis=0, subset=['mark']) +df3 = df3.reset_index(level=1, drop=True) +df4 = df3.set_index(['trade_date','identifier', 'source', 'principal_payment']) +df4 = df4.groupby(level=['trade_date', 'identifier','source', 'principal_payment']).mean() +df4 = df4.set_index('price', append=True) +df5 = df4.unstack(-3) +df5.columns = df5.columns.droplevel() +#df6 = df5.reset_index(col_level=1, drop=True) +df6 = df5.sortlevel(0, ascending = False) +df6 = df6.reset_index() + +r2 = {} +stderr = {} +parameters = {} +#for g in df5['mark']: +for col in df6.columns[4:]: + regmodel = gls(formula= "df6['price'] ~df6[col]", data = df6).fit() + r2[col] = regmodel.rsquared + stderr[col] = regmodel.bse + parameters[col] = regmodel.params + +r2 = pd.Series(r2, name= 'R2') +r2.index.name = 'Source' +r2.reset_index +r2 = pd.DataFrame(r2) +stderr = pd.DataFrame(stderr) +parameters = pd.DataFrame(parameters) + +df7 = df6 +for col in df7.columns[4:]: + df7[col] = (df7.price - df7[col])/df7[col] + +filter = 2 +begindate = '2015-01-01' +enddate = '2017-01-01' +df8 = df7[(df7['trade_date']>begindate) & (df7['trade_date']<enddate)] +difference= pd.DataFrame(df8[df8<filter].mean(), columns=['Average']) +difference['Std Dev'] = df8[df8<filter].std() +difference.reset_index() + +writer = pd.ExcelWriter('/home/serenitas/edwin/Subprime_ModelandBacktest_outputonly.xlsx') +df6.to_excel(writer, sheet_name = 'BackTesting', header = True, na_rep = '', startrow= 1, startcol= 1) +r2.to_excel(writer, sheet_name = 'regression', header = True) +parameters.to_excel(writer, sheet_name = 'regression', header = True, startrow= 0, startcol= 5) +difference[2:].to_excel(writer, sheet_name = 'Diff', header = True, startrow= 1, startcol= 1) +df7.to_excel(writer, sheet_name = 'Diff', header = True, startrow= 15, startcol= 1) +writer.save() + +#fig = plt.plot(figsize=(12,8)) +#fig = sm.graphics.plot_partregress_grid(regmodel,fig=fig) + + +#df6 = pd.DataFrame() +#for g in df5['mark']: +# d[g] = (df5['price'] - df5['mark'][g])/df5['mark'][g] +#df6.from_dict(d) + |
