aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/marks_to_HY.py166
1 files changed, 146 insertions, 20 deletions
diff --git a/python/marks_to_HY.py b/python/marks_to_HY.py
index 95e7ac34..ff4e72ea 100644
--- a/python/marks_to_HY.py
+++ b/python/marks_to_HY.py
@@ -2,23 +2,64 @@ import pandas as pd
from db import dbengine
import numpy as np
import statsmodels.api as sm
-from statsmodels.formula.api import gls
+from statsmodels.formula.api import ols
from pandas.tseries.offsets import MonthEnd
import datetime
-from PyQt4 import QtGui #has conflict with matplotlib
+from pandas.tseries.offsets import *
+import bbg_helpers
+#from PyQt4 import QtGui #has conflict with matplotlib
#import matplotlib
#matplotlib.use("Q4Agg") #to prevent conflict with PyQT4
+def monthlySpreadDiff(index="IG", tenor="5yr", period="1yr"):
+ date = (pd.Timestamp.now() - pd.DateOffset(years=4)).date()
+ sql_str = "SELECT date, series, closespread AS spread FROM index_quotes " \
+ "WHERE index=%s and date>=%s and tenor = %s"
+ df = pd.read_sql_query(sql_str, dbengine('serenitasdb'), parse_dates=['date'], index_col=['date', 'series'],
+ params=(index.upper(), date, "5yr"))
+ df1 = df.unstack(level=1).resample('1m').last().diff(periods = period)
+ df1 = df1.stack(level = 1).groupby(level = 'date').last()
+ return df1
+
def nearestDate(base, dates):
nearness = { abs(base.timestamp() - date.timestamp()) : date for date in dates }
return nearness[min(nearness.keys())]
+def interpolate_rates(s, hist_data):
+ date, rate = s.BDay, s.moddur
+ v = hist_data.loc[date].values
+ return interp1d(v[:,1], v[:,0], fill_value='extrapolate')(rate)
+
+def aux(df):
+ #assume shock to be 40
+ df['adjpv'] = (df.pv+df.delta_ir*df.yielddiff/40)
+ return df
+
+
#Df = Read all external Marks
-df = (pd.read_sql_table('external_marks_mapped',dbengine('rmbs_model'),
+df = (pd.read_sql_table('external_marks_mapped',dbengine('dawndb'),
parse_dates=['date']).
sort_values('date'))
+BBG_IP = ['192.168.9.65']
+#BBG_IP = ['192.168.9.61', '192.168.9.65', '192.168.0.10', '192.168.0.12']
+bbgstartdate = pd.datetime(2013, 1, 1)
+hist_securities = ["US0006M Index", "USSWAP2 Curncy", "USSWAP3 Curncy", "USSWAP4 Curncy", "USSWAP5 Curncy", "USSWAP7 Curncy", "USSWAP10 Curncy"]
+hist_fields = ['PX_LAST']
+
+with bbg_helpers.init_bbg_session(BBG_IP) as session:
+ hist_data = bbg_helpers.retrieve_data(session, hist_securities, hist_fields, start_date=bbgstartdate)
+
+hist_data1 = pd.concat(hist_data,ignore_index=False, names = ['Rate Index', 'numIndex']).reset_index()
+mapping = {"US0006M Index": .5, "USSWAP10 Curncy": 10, "USSWAP2 Curncy": 2, "USSWAP3 Curncy": 3, "USSWAP4 Curncy": 4, "USSWAP5 Curncy": 5, "USSWAP7 Curncy": 7}
+mapping = pd.Series(mapping, name = 'tenor')
+mapping.index.name = 'Rate Index'
+mapping = mapping.reset_index()
+hist_data1 = hist_data1.merge(mapping, on='Rate Index')
+hist_data1 = hist_data1[['date','PX_LAST','tenor']].set_index('date')
+#rate = interpolateRates(hist_data1, '2016-12-20', 3.4)
+
#Ddf1 = reformat dates, get rid of duplicates of external marks
df1 = df
df1['date'] = pd.to_datetime(df['date'], errors= 'coerce')
@@ -29,7 +70,7 @@ df1 = df1.reset_index()
df1 = df1.set_index(['date'])
#df2 = mapping of monthend to the closest available model date
-mapping = pd.read_sql_query("select distinct date(timestamp) as model_date from priced",dbengine('dawndb'), parse_dates = ['model_date']).sort_values('model_date')
+mapping = pd.read_sql_query("select distinct date(timestamp) as model_date from priced",dbengine('rmbs_model'), parse_dates = ['model_date']).sort_values('model_date')
df2 = df.set_index(['date'])
df2 = df2.groupby(level=['date'])['mark'].mean()
for dt in df2.index:
@@ -42,9 +83,11 @@ df1 = df1.join(df2)
#Df3 = all the model runs on the available model dates
df3=pd.DataFrame()
-sql_str = "select timestamp as date, cusip, moddur from priced where date(timestamp) = %s and model_version = 3"
+sql_str = "select timestamp as date, pv, cusip, moddur, delta_ir from priced " \
+ "where date(timestamp) = %s and model_version = 1"
for date in df2['model_date']:
- df3= df3.append(pd.read_sql_query(sql_str,dbengine('dawndb'), parse_dates = ['date'], params=(date, )))
+ #Note: Need to add .to_pydatetime() to be converted into a datetime passable to MYSQL. Not required for Postgres
+ df3= df3.append(pd.read_sql_query(sql_str,dbengine('rmbs_model'), parse_dates = ['date'], params=(date.to_pydatetime(), )))
df3= df3.rename(columns = {"cusip": "identifier"})
df3 = df3.sort_values('date')
@@ -57,22 +100,86 @@ df1= df1.rename(columns = {"model_date": "date", "date": "monthend_date"})
df1 = df1.set_index(['date','identifier'])
df1 = df1.merge(df3, left_index = True, right_index= True)
-df1['spread'] = (100-df1['mark'])/df1['moddur']
-df1 = df1.reset_index()
-df1 = df1.set_index(['identifier','source','monthend_date'])
+df1 = df1.reset_index().sort_values('monthend_date')
+availratesdate = pd.DataFrame(sorted(pd.unique(hist_data1.index)), columns=['date'])
+df1['BDay'] = pd.merge_asof(df1, availratesdate, left_on = 'monthend_date', right_on = 'date', allow_exact_matches=True)['date_y']
-#df5 = difference in spread
-difference = df1.groupby(level=['identifier','source']).diff()
+df1temp = df1.groupby(['identifier','BDay','moddur']).mean().reset_index()
+df1temp['yield'] = df1temp.apply(interpolate_rates, 1, args=(hist_data1,))
+df1temp['yielddiff'] = df1temp.groupby(['identifier'])['yield'].diff()*100
+df1 = pd.merge(df1, df1temp[['identifier', 'BDay', 'moddur','yielddiff']], on= ['identifier', 'BDay', 'moddur'])
-def readtenorSpread1(index="IG", tenor="5yr"):
- date = (pd.Timestamp.now() - pd.DateOffset(years=4)).date()
- sql_str = "SELECT date, series, closespread AS spread FROM index_quotes " \
- "WHERE index=%s and date>=%s and tenor = %s"
- df = pd.read_sql_query(sql_str, dbengine('serenitasdb'), parse_dates=['date'], index_col=['date', 'series'],
- params=(index.upper(), date, "5yr"))
- df1 = df.unstack(level=1).resample('1m').last().diff()
- df2 = df1.stack(level = 1).groupby(level = 'date').last()
- return df2
+#doing in roundabout way just as notes. no need to groupby unless we are doing a diff along the way
+df1 = df1.groupby('identifier').apply(aux)
+df1.adjpv= df1.adjpv.where(df1.adjpv.notnull(), df1.pv)
+df1['spread'] = (df1['adjpv']*100-df1['mark'])/df1['moddur']*100
+
+df1 = df1.set_index(['monthend_date','identifier','source'])
+return df1
+
+#difference = difference in spread
+#Filter
+SpreadFilter = 1000
+SpreadDiffFilter = 250
+DurationFilter = 1.5
+begindate = '2014-01-01'
+enddate = '2017-01-01'
+interval = 1
+HYDifference = monthlySpreadDiff(index = 'HY', period = interval)
+difference = df1[abs(df1['spread'])<SpreadFilter].groupby(level=['identifier','source'])['pv','moddur','spread'].diff(periods = interval)
+difference = difference[abs(difference['spread'])<SpreadDiffFilter]
+difference = difference[abs(difference['moddur'])<DurationFilter]
+difference.spread = difference.spread.astype(np.float64)
+
+#method 1: for all sources - average through Time
+avgchange1 = pd.DataFrame(difference.groupby(level=['monthend_date']).mean()['spread'])
+avgchange1 = avgchange1.merge(HYDifference, left_index = True, right_index= True)
+avgchange1 = avgchange1.rename(columns={'spread_x': 'marks', 'spread_y':'HY'})
+avgchange1 = avgchange1[begindate:enddate]
+regmodel1 = sm.OLS(avgchange1['marks'],avgchange1['HY']).fit()
+ratio1 = (avgchange1['HY']/avgchange1['marks']).mean()
+
+#method 2: look at it source by source
+avgchange2 = pd.DataFrame(difference.groupby(level=['monthend_date','source']).mean()['spread'])
+avgchange2 = avgchange2.unstack(level =1)
+HYDifference.columns = pd.MultiIndex.from_arrays([['Index'],['HY']])
+avgchange2 = avgchange2.merge(HYDifference, left_index = True, right_index= True)
+regmodel2 ={}
+ratio2 = {}
+for source in avgchange2['spread']:
+ regmodel2[source] = sm.OLS(avgchange2['spread'][source],avgchange2['Index']['HY'], missing= 'drop').fit()
+ ratio2[source] = (avgchange2['spread'][source]/avgchange2['Index']['HY']).mean()
+
+Rsquared2 = {}
+params2 = {}
+for source in regmodel2:
+ Rsquared2[source] = regmodel2[source].rsquared
+ params2[source] = regmodel2[source].params
+
+#method 3: just regress mark spread change (x) to HY change (y) regardless of Time
+#has filter to do JUST priceserve and pricingdirect
+difference1 = difference.reset_index().set_index('monthend_date')
+#difference1 = difference1.loc[difference1['source'].isin(['PRICINGDIRECT','PRICESERVE'])]
+#difference1 = difference1[(difference1['source'] == 'PRICESERVE')]
+difference1 = difference1.merge(HYDifference, left_index = True, right_index= True)
+regmodel3 = {}
+Rsquared3 = {}
+params3 = {}
+for source, g in difference1.groupby('source'):
+ regmodel3[source] = sm.OLS(g['spread'],g[('Index', 'HY')], missing = 'drop').fit()
+ Rsquared3[source] = regmodel3[source].rsquared
+ params3[source] = regmodel3[source].params
+#regmodel3.summary()
+
+#method 4: Let's just look at how spread changes within each source (i.e. see if spread widening ==> lower marks for most/all bonds?)
+#difference.xs('PRICESERVE', level = 'source', drop_level=False)
+#difference2[difference2['source']=='BVAL'].to_clipboard()
+difference4 = difference1.rename(columns = {('Index', 'HY'): "HY Spread"})
+difference4['HY vs mark'] = difference4['spread'] - difference4['HY Spread']
+difference4.index = difference4.index.rename('monthend_date')
+difference4 = difference4.set_index('source', append = True)
+meanmarkchange= difference4.groupby(level = ['monthend_date', 'source'])['spread'].mean().unstack(level = -1)
+StdDevmarkchange = difference4.groupby(level = ['monthend_date', 'source'])['spread'].std().unstack(level = -1)
#failed code
#df1 = df
@@ -103,3 +210,22 @@ def readtenorSpread1(index="IG", tenor="5yr"):
# df = df.set_index('date')
# df = df.diff()
# return df.dropna()
+
+
+# def interpolateRates(df, date, duration):
+# set1 = False
+# set2 = False
+# #import pdb; pdb.set_trace()
+# for tenor in sorted(df[date]['tenor']):
+# if duration < tenor and set1 == False:
+# lowertenor = tenor
+# lowerrate = df[date][df[date]['tenor'].isin([tenor])]['PX_LAST']
+# set1 = True
+# elif duration < tenor and set2 == False and set1 == True:
+# uppertenor = tenor
+# upperrate = df[date][df[date]['tenor'].isin([tenor])]['PX_LAST']
+# set2 = True
+# rate = (upperrate[0]-lowerrate[0])/(uppertenor-lowertenor)*(duration - lowertenor)+lowerrate[0]
+# else:
+# rate = df[date][df[date]['tenor'].isin([tenor])]['PX_LAST'][0]
+# return rate \ No newline at end of file