1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
|
import pandas as pd
from db import dbengine
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.formula.api import gls
import seaborn as sb
df = pd.read_sql_table('external_marks_mapped', dbengine('dawndb'),
parse_dates=['date'])
df = df[df.source.notnull()]
df_wide = (pd.pivot_table(df, 'mark', ['identifier', 'date'], 'source').
reset_index().
sort_values('date'))
df_trades = {}
df_trades['sell'] = pd.read_sql_query("select trade_date, identifier, faceamount, principal_payment, price " \
"from bonds where buysell = 'f'"
, dbengine('dawndb'),
parse_dates=['trade_date'])
df_trades['buy'] = pd.read_sql_query("select trade_date, identifier, faceamount, principal_payment, price " \
"from bonds where buysell = 't'"
, dbengine('dawndb'),
parse_dates=['trade_date'])
for direction, trades in df_trades.items():
df_trades[direction] = trades.sort_values('trade_date')
df_sell_wide = pd.merge_asof(df_trades['sell'], df_wide, left_on='trade_date', right_on='date', by='identifier')
df_wide_temp = df_wide.set_index('date').shift(periods = -1, freq= 'M', axis = 1).reset_index()
df_buy_wide = pd.merge_asof(df_trades['buy'], df_wide_temp
, left_on='trade_date', right_on='date', by='identifier')
d_sell = {}
d_buy = {}
df_long = {}
for source, g in df.groupby('source'):
d_sell[source] = pd.merge_asof(df_trades['sell'], g, left_on='trade_date', right_on='date', by='identifier')
d_sell[source]['dir'] = 'sell'
d_buy[source] = pd.merge_asof(df_trades['buy'], g.set_index('date').shift(periods = -1, freq= 'BM', axis = 1).reset_index(), left_on='trade_date', right_on='date', by='identifier')
d_buy[source]['dir'] = 'buy'
#add a column saying it is buy or sell
df_long = pd.concat(d_sell, join='inner')
df_long = df_long.append(pd.concat(d_buy, join='inner'))
df_long = df_long.reset_index().dropna(subset=['price','mark'])
df_long['difference'] = (df_long['price'] - df_long['mark'])/df_long['mark']
#filtering
diff_threshold = 5
begindate = '2013-01-01'
enddate = '2018-01-01'
df_long = df_long[df_long.identifier.str.len() == 9]
df_long = df_long[(df_long['trade_date'] > begindate) & (df_long['trade_date'] < enddate)]
#difference['Std Dev'] = df_long[df_long<filter].std()
#pretty plot
#df_long['dotsize'] = df_long['level_0'].apply(lambda x: 400 if x == 'MANAGER' else 20)
plt.switch_backend('Agg')
sb.set_style("whitegrid")
sb.set_context("notebook")
#plt.switch_backend('Qt4Agg')
order = ['MANAGER','BROKER','BVAL','IDC','MARKIT','PB','PRICESERVE','PRICINGDIRECT','REUTERS','S&P']
sb.set_palette(sb.hls_palette(10, l=.4, s=.8))
g_sell = sb.FacetGrid(df_long[df_long.dir == 'sell'], hue='source', hue_kws={'s':[50] + [20]*9, 'marker': ["o"]+["s"]*9, 'alpha': [1]+[.4]*9}, legend_out=True, aspect = 2.1, size = 4, hue_order = order)
g_sell.set(ylim=(0, 105), xlim=(0, 105))
ax_sell = g_sell.map(plt.scatter, 'mark', 'price').add_legend()
g_buy = sb.FacetGrid(df_long[df_long.dir == 'buy'], hue='source', hue_kws={'s':[50] + [20]*9, 'marker': ["o"]+["s"]*9, 'alpha': [1]+[.4]*9}, legend_out=True, aspect = 2.1, size = 4, hue_order = order)
g_buy.set(ylim=(0, 105), xlim=(0, 105))
ax_buy = g_buy.map(plt.scatter, 'mark', 'price').add_legend()
ax_sell.fig.savefig("/home/serenitas/edwin/backtest_sales.png")
ax_buy.fig.savefig("/home/serenitas/edwin/backtest_buys.png")
params = {}
error = {}
trade_PNL = {}
trade_PNL_stat = {}
#fit all the models at once
for direction, df_temp in df_long[(df_long.difference < diff_threshold)].groupby('dir'):
params[direction] = (df_temp.
groupby('source').
apply(lambda df: gls('price~mark', df).fit().params))
error[direction] = [df_temp.groupby('source').mean()['difference'],
df_temp.groupby('source').std()['difference']]
trade_PNL[direction] = df_temp.groupby(['date', 'source']).apply(
lambda df: sum(df.principal_payment * df.difference)/
sum(df.principal_payment))
trade_PNL_stat[direction] = [trade_PNL[direction].groupby(level = 'source').mean(),
trade_PNL[direction].groupby(level = 'source').std()]
buychart = trade_PNL['buy'].unstack(-1).plot()
|