aboutsummaryrefslogtreecommitdiffstats
path: root/python/marks_to_HY.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/marks_to_HY.py')
-rw-r--r--python/marks_to_HY.py26
1 files changed, 14 insertions, 12 deletions
diff --git a/python/marks_to_HY.py b/python/marks_to_HY.py
index ff4e72ea..f20f7711 100644
--- a/python/marks_to_HY.py
+++ b/python/marks_to_HY.py
@@ -4,6 +4,7 @@ import numpy as np
import statsmodels.api as sm
from statsmodels.formula.api import ols
from pandas.tseries.offsets import MonthEnd
+from scipy.interpolate import interp1d
import datetime
from pandas.tseries.offsets import *
import bbg_helpers
@@ -12,14 +13,15 @@ import bbg_helpers
#import matplotlib
#matplotlib.use("Q4Agg") #to prevent conflict with PyQT4
-def monthlySpreadDiff(index="IG", tenor="5yr", period="1yr"):
+def monthlySpreadDiff(index="IG", tenor="5yr"):
date = (pd.Timestamp.now() - pd.DateOffset(years=4)).date()
- sql_str = "SELECT date, series, closespread AS spread FROM index_quotes " \
+ sql_str = "SELECT date, series, version, closespread AS spread FROM index_quotes " \
"WHERE index=%s and date>=%s and tenor = %s"
df = pd.read_sql_query(sql_str, dbengine('serenitasdb'), parse_dates=['date'], index_col=['date', 'series'],
params=(index.upper(), date, "5yr"))
- df1 = df.unstack(level=1).resample('1m').last().diff(periods = period)
- df1 = df1.stack(level = 1).groupby(level = 'date').last()
+ df1 = df.groupby(level = ['date','series']).last()
+ df1 = df1.unstack(level = 1).resample('1m').last().diff()
+ df1 = df1.stack(level = 1).groupby(level = 'date').last()['spread']
return df1
def nearestDate(base, dates):
@@ -115,7 +117,6 @@ df1.adjpv= df1.adjpv.where(df1.adjpv.notnull(), df1.pv)
df1['spread'] = (df1['adjpv']*100-df1['mark'])/df1['moddur']*100
df1 = df1.set_index(['monthend_date','identifier','source'])
-return df1
#difference = difference in spread
#Filter
@@ -125,7 +126,7 @@ DurationFilter = 1.5
begindate = '2014-01-01'
enddate = '2017-01-01'
interval = 1
-HYDifference = monthlySpreadDiff(index = 'HY', period = interval)
+HYDifference = monthlySpreadDiff(index = 'HY')
difference = df1[abs(df1['spread'])<SpreadFilter].groupby(level=['identifier','source'])['pv','moddur','spread'].diff(periods = interval)
difference = difference[abs(difference['spread'])<SpreadDiffFilter]
difference = difference[abs(difference['moddur'])<DurationFilter]
@@ -133,15 +134,15 @@ difference.spread = difference.spread.astype(np.float64)
#method 1: for all sources - average through Time
avgchange1 = pd.DataFrame(difference.groupby(level=['monthend_date']).mean()['spread'])
-avgchange1 = avgchange1.merge(HYDifference, left_index = True, right_index= True)
-avgchange1 = avgchange1.rename(columns={'spread_x': 'marks', 'spread_y':'HY'})
+avgchange1 = avgchange1.join(HYDifference, rsuffix = 'HY')
avgchange1 = avgchange1[begindate:enddate]
-regmodel1 = sm.OLS(avgchange1['marks'],avgchange1['HY']).fit()
-ratio1 = (avgchange1['HY']/avgchange1['marks']).mean()
+regmodel1 = sm.OLS(avgchange1['spread'],avgchange1['spreadHY']).fit()
+ratio1 = (avgchange1['spreadHY']/avgchange1['spread']).mean()
#method 2: look at it source by source
avgchange2 = pd.DataFrame(difference.groupby(level=['monthend_date','source']).mean()['spread'])
avgchange2 = avgchange2.unstack(level =1)
+HYDifference = HYDifference.to_frame()
HYDifference.columns = pd.MultiIndex.from_arrays([['Index'],['HY']])
avgchange2 = avgchange2.merge(HYDifference, left_index = True, right_index= True)
regmodel2 ={}
@@ -161,12 +162,13 @@ for source in regmodel2:
difference1 = difference.reset_index().set_index('monthend_date')
#difference1 = difference1.loc[difference1['source'].isin(['PRICINGDIRECT','PRICESERVE'])]
#difference1 = difference1[(difference1['source'] == 'PRICESERVE')]
+HYDifference.columns = pd.MultiIndex.from_arrays([('HY',)])
difference1 = difference1.merge(HYDifference, left_index = True, right_index= True)
regmodel3 = {}
Rsquared3 = {}
params3 = {}
for source, g in difference1.groupby('source'):
- regmodel3[source] = sm.OLS(g['spread'],g[('Index', 'HY')], missing = 'drop').fit()
+ regmodel3[source] = sm.OLS(g['spread'],g['HY'], missing = 'drop').fit()
Rsquared3[source] = regmodel3[source].rsquared
params3[source] = regmodel3[source].params
#regmodel3.summary()
@@ -174,7 +176,7 @@ for source, g in difference1.groupby('source'):
#method 4: Let's just look at how spread changes within each source (i.e. see if spread widening ==> lower marks for most/all bonds?)
#difference.xs('PRICESERVE', level = 'source', drop_level=False)
#difference2[difference2['source']=='BVAL'].to_clipboard()
-difference4 = difference1.rename(columns = {('Index', 'HY'): "HY Spread"})
+difference4 = difference1.rename(columns = {('HY'): "HY Spread"})
difference4['HY vs mark'] = difference4['spread'] - difference4['HY Spread']
difference4.index = difference4.index.rename('monthend_date')
difference4 = difference4.set_index('source', append = True)