diff options
Diffstat (limited to 'python')
| -rw-r--r-- | python/externalmarksbackfill.py | 1 | ||||
| -rw-r--r-- | python/mark_backtest_underpar.py | 52 | ||||
| -rw-r--r-- | python/marks_to_HY.py | 105 |
3 files changed, 155 insertions, 3 deletions
diff --git a/python/externalmarksbackfill.py b/python/externalmarksbackfill.py index 7fd6b14c..c0a8bdda 100644 --- a/python/externalmarksbackfill.py +++ b/python/externalmarksbackfill.py @@ -57,6 +57,7 @@ def get_globs(): return chain.from_iterable(globs) settings = { + 'ReviewedPack.bseeman.SERENITAS.SERCGMAST.20161101.20161130.Draft.xlsx': ("JA:JX", "Securities Valuation Details", "Y"), 'ReviewedPack.bseeman.SERENITAS.SERCGMAST.20161001.20161031.Draft.xlsx': ("JA:JX", "Securities Valuation Details", "Y"), 'ReviewedPack.bseeman.SERENITAS.SERCGMAST.20160901.20160930.Draft.xlsx': ("JA:JX", "Securities Valuation Details", "Y"), 'ReviewedPack.bseeman.SERENITAS.SERCGMAST.20160801.20160831.Draft.xlsx': ("JA:JX", "Securities Valuation Details", "Y"), diff --git a/python/mark_backtest_underpar.py b/python/mark_backtest_underpar.py index 863af8c7..28f8f059 100644 --- a/python/mark_backtest_underpar.py +++ b/python/mark_backtest_underpar.py @@ -19,13 +19,59 @@ df_trades = pd.read_sql_query("select trade_date, identifier, principal_payment, parse_dates=['trade_date']) df_trades = df_trades.sort_values('trade_date') df_wide = pd.merge_asof(df_trades, df_wide, left_on='trade_date', right_on='date', by='identifier') -df_long = pd.merge_asof(df_trades, df, left_on='trade_date', right_on='date', by='identifier') +d = {} +for source, g in df.groupby('source'): + d[source] = pd.merge_asof(df_trades, g, left_on='trade_date', right_on='date', by='identifier') +df_long = pd.concat(d, join='inner') +df_long = df_long.reset_index() +df_long = df_long.dropna(subset=['price','mark']) + +#filtering +#filter = 2 +begindate = '2013-01-01' +enddate = '2017-01-01' +df_long = df_long[df_long.identifier.str.len()==9] +df_long = df_long[(df_long['trade_date']>begindate) & (df_long['trade_date']<enddate)] +#difference= df_long[df_long['<filter].groupby('level_0').[.mean(), columns=['Average']) +#difference['Std Dev'] = df_long[df_long<filter].std() #pretty plot -sns.lmplot(x='mark', y='price', hue = 'source', data=df_long, fit_reg=False) +#dotsize = pd.Series.tolist(df_long['level_0'].apply(lambda x: 400 if x == 'MANAGER' else 20)) +#df_long['dotsize'] = df_long['level_0'].apply(lambda x: 400 if x == 'MANAGER' else 20) +df_long['dotsize'] = df_long['level_0'].apply(lambda x: 400 if x == 'MANAGER' else 20) +plt.switch_backend('Agg') +sb.set_style("whitegrid") +sb.set_context("notebook") +regplot = sb.lmplot(x='mark', y='price', hue = 'source', data=df_long, fit_reg=False, legend_out=True, aspect = 2.1, size = 4, scatter_kws={"s": df_long['dotsize']}) +regplot.set(ylim=(0, 105), xlim=(0, 105)) +#plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) +regplot.fig.savefig("/home/serenitas/edwin/fig1.png") + +#plt.switch_backend('Qt4Agg') +order = ['MANAGER','BROKER','BVAL','IDC','MARKIT','PB','PRICESERVE','PRICINGDIRECT','REUTERS','S&P'] +sb.set_palette(sb.hls_palette(10, l=.4, s=.8)) +g = sb.FacetGrid(df_long, hue='source', hue_kws={'s':[50] + [20]*9, 'marker': ["o"]+["s"]*9, 'alpha': [1]+[.4]*9}, legend_out=True, aspect = 2.1, size = 4, hue_order = order) +g.set(ylim=(0, 105), xlim=(0, 105)) +ax = g.map(plt.scatter, 'mark', 'price').add_legend() +ax.fig.savefig("/home/serenitas/edwin/fig2.png") #fit all the models at once params = (df_long. groupby('source'). - apply(lambda df: gls('price~mark', df).fit().params) + apply(lambda df: gls('price~mark', df).fit().params)) + + +def f(a, b, c): + return a+b+c + +f(1, 2, 3) + +l=[1, 2, 3] +f(*l) + +def g(a, b, **kwargs): + if 'edwin' in kwargs: + print(kwargs[edwin]) + +g(1, 2, edwin='hello')
\ No newline at end of file diff --git a/python/marks_to_HY.py b/python/marks_to_HY.py new file mode 100644 index 00000000..95e7ac34 --- /dev/null +++ b/python/marks_to_HY.py @@ -0,0 +1,105 @@ +import pandas as pd +from db import dbengine +import numpy as np +import statsmodels.api as sm +from statsmodels.formula.api import gls +from pandas.tseries.offsets import MonthEnd +import datetime +from PyQt4 import QtGui #has conflict with matplotlib + +#import matplotlib +#matplotlib.use("Q4Agg") #to prevent conflict with PyQT4 + +def nearestDate(base, dates): + nearness = { abs(base.timestamp() - date.timestamp()) : date for date in dates } + return nearness[min(nearness.keys())] + +#Df = Read all external Marks +df = (pd.read_sql_table('external_marks_mapped',dbengine('rmbs_model'), + parse_dates=['date']). + sort_values('date')) + +#Ddf1 = reformat dates, get rid of duplicates of external marks +df1 = df +df1['date'] = pd.to_datetime(df['date'], errors= 'coerce') +df1 = df1.sort_values('date') +df1 = df1.set_index(['date','identifier','source']) +df1 = df1.groupby(level=['date','identifier','source'])['mark'].mean() +df1 = df1.reset_index() +df1 = df1.set_index(['date']) + +#df2 = mapping of monthend to the closest available model date +mapping = pd.read_sql_query("select distinct date(timestamp) as model_date from priced",dbengine('dawndb'), parse_dates = ['model_date']).sort_values('model_date') +df2 = df.set_index(['date']) +df2 = df2.groupby(level=['date'])['mark'].mean() +for dt in df2.index: + df2[dt]= nearestDate(dt, mapping['model_date']) +df2 = pd.DataFrame(df2) +df2.columns = ['model_date'] + +#merge the mapped model dates back to the external marks table +df1 = df1.join(df2) + +#Df3 = all the model runs on the available model dates +df3=pd.DataFrame() +sql_str = "select timestamp as date, cusip, moddur from priced where date(timestamp) = %s and model_version = 3" +for date in df2['model_date']: + df3= df3.append(pd.read_sql_query(sql_str,dbengine('dawndb'), parse_dates = ['date'], params=(date, ))) + +df3= df3.rename(columns = {"cusip": "identifier"}) +df3 = df3.sort_values('date') +df3['date'] = df3['date'].dt.date +df3 = df3.set_index(['date', 'identifier']) + +#Now merge external monthend marks with the available model dates +df1 = df1.reset_index() +df1= df1.rename(columns = {"model_date": "date", "date": "monthend_date"}) +df1 = df1.set_index(['date','identifier']) + +df1 = df1.merge(df3, left_index = True, right_index= True) +df1['spread'] = (100-df1['mark'])/df1['moddur'] +df1 = df1.reset_index() +df1 = df1.set_index(['identifier','source','monthend_date']) + +#df5 = difference in spread +difference = df1.groupby(level=['identifier','source']).diff() + +def readtenorSpread1(index="IG", tenor="5yr"): + date = (pd.Timestamp.now() - pd.DateOffset(years=4)).date() + sql_str = "SELECT date, series, closespread AS spread FROM index_quotes " \ + "WHERE index=%s and date>=%s and tenor = %s" + df = pd.read_sql_query(sql_str, dbengine('serenitasdb'), parse_dates=['date'], index_col=['date', 'series'], + params=(index.upper(), date, "5yr")) + df1 = df.unstack(level=1).resample('1m').last().diff() + df2 = df1.stack(level = 1).groupby(level = 'date').last() + return df2 + +#failed code +#df1 = df +#df2 = df +#df1 = df1.set_index(['date','identifier','source']) +#df2['prevdate'] = df2['date']- MonthEnd(n=1) +#df2 = df2.rename(columns={'date': 'nextdate', 'prevdate':'date'}) +#df2 = df2.set_index(['date','identifier','source']) +#import pdb; pdb.set_trace() +#df3 = df1.mark - df2.mark + +#now let's get HY spreads +# index = 'IG' +# def readtenorSpreadold(index): +# serenitasdb = dbengine('serenitasdb') +# maxindexnumber = pd.read_sql_query("select max(series) from index_version where index= %s",serenitasdb, params=(index,))['max'][0] +# d={} +# seriesstart = {} +# df = pd.DataFrame() +# for i in range(maxindexnumber-8, maxindexnumber+1): +# #import pdb; pdb.set_trace() +# d[i]= pd.read_sql_query("select date, closeprice, closespread from index_quotes where index= %s and series = %s and tenor = '5yr' order by date desc",serenitasdb, parse_dates = ['date'], params=(index,i)) +# seriesstart[i] = d[i].min(axis=0).date +# seriesstart[maxindexnumber+1]= pd.Timestamp(2030,1,1) +# for i in range(maxindexnumber-8, maxindexnumber+1): +# df = df.append(d[i][(d[i].date>seriesstart[i]) & (d[i].date<seriesstart[i+1])]) +# df = df.sort_values('date') +# df = df.set_index('date') +# df = df.diff() +# return df.dropna() |
