1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
|
import pandas as pd
from db import dbengine
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.formula.api import gls
import seaborn as sb
df = pd.read_sql_table('external_marks_mapped',dbengine('dawndb'),
parse_dates=['date'])
df = df[df.source.notnull()]
df_wide = (pd.pivot_table(df, 'mark', ['identifier', 'date'], 'source').
reset_index().
sort_values('date'))
df_trades = pd.read_sql_query("select trade_date, identifier, principal_payment, price " \
"from bonds where buysell = 'f'"
,dbengine('dawndb'),
parse_dates=['trade_date'])
df_trades = df_trades.sort_values('trade_date')
df_wide = pd.merge_asof(df_trades, df_wide, left_on='trade_date', right_on='date', by='identifier')
d = {}
for source, g in df.groupby('source'):
d[source] = pd.merge_asof(df_trades, g, left_on='trade_date', right_on='date', by='identifier')
df_long = pd.concat(d, join='inner')
df_long = df_long.reset_index()
df_long = df_long.dropna(subset=['price','mark'])
#filtering
#filter = 2
begindate = '2013-01-01'
enddate = '2017-01-01'
df_long = df_long[df_long.identifier.str.len()==9]
df_long = df_long[(df_long['trade_date']>begindate) & (df_long['trade_date']<enddate)]
#difference= df_long[df_long['<filter].groupby('level_0').[.mean(), columns=['Average'])
#difference['Std Dev'] = df_long[df_long<filter].std()
#pretty plot
#dotsize = pd.Series.tolist(df_long['level_0'].apply(lambda x: 400 if x == 'MANAGER' else 20))
#df_long['dotsize'] = df_long['level_0'].apply(lambda x: 400 if x == 'MANAGER' else 20)
df_long['dotsize'] = df_long['level_0'].apply(lambda x: 400 if x == 'MANAGER' else 20)
plt.switch_backend('Agg')
sb.set_style("whitegrid")
sb.set_context("notebook")
regplot = sb.lmplot(x='mark', y='price', hue = 'source', data=df_long, fit_reg=False, legend_out=True, aspect = 2.1, size = 4, scatter_kws={"s": df_long['dotsize']})
regplot.set(ylim=(0, 105), xlim=(0, 105))
#plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
regplot.fig.savefig("/home/serenitas/edwin/fig1.png")
#plt.switch_backend('Qt4Agg')
order = ['MANAGER','BROKER','BVAL','IDC','MARKIT','PB','PRICESERVE','PRICINGDIRECT','REUTERS','S&P']
sb.set_palette(sb.hls_palette(10, l=.4, s=.8))
g = sb.FacetGrid(df_long, hue='source', hue_kws={'s':[50] + [20]*9, 'marker': ["o"]+["s"]*9, 'alpha': [1]+[.4]*9}, legend_out=True, aspect = 2.1, size = 4, hue_order = order)
g.set(ylim=(0, 105), xlim=(0, 105))
ax = g.map(plt.scatter, 'mark', 'price').add_legend()
ax.fig.savefig("/home/serenitas/edwin/fig2.png")
#fit all the models at once
params = (df_long.
groupby('source').
apply(lambda df: gls('price~mark', df).fit().params))
|