diff options
Diffstat (limited to 'python/marks_to_HY.py')
| -rw-r--r-- | python/marks_to_HY.py | 26 |
1 files changed, 14 insertions, 12 deletions
diff --git a/python/marks_to_HY.py b/python/marks_to_HY.py index ff4e72ea..f20f7711 100644 --- a/python/marks_to_HY.py +++ b/python/marks_to_HY.py @@ -4,6 +4,7 @@ import numpy as np import statsmodels.api as sm from statsmodels.formula.api import ols from pandas.tseries.offsets import MonthEnd +from scipy.interpolate import interp1d import datetime from pandas.tseries.offsets import * import bbg_helpers @@ -12,14 +13,15 @@ import bbg_helpers #import matplotlib #matplotlib.use("Q4Agg") #to prevent conflict with PyQT4 -def monthlySpreadDiff(index="IG", tenor="5yr", period="1yr"): +def monthlySpreadDiff(index="IG", tenor="5yr"): date = (pd.Timestamp.now() - pd.DateOffset(years=4)).date() - sql_str = "SELECT date, series, closespread AS spread FROM index_quotes " \ + sql_str = "SELECT date, series, version, closespread AS spread FROM index_quotes " \ "WHERE index=%s and date>=%s and tenor = %s" df = pd.read_sql_query(sql_str, dbengine('serenitasdb'), parse_dates=['date'], index_col=['date', 'series'], params=(index.upper(), date, "5yr")) - df1 = df.unstack(level=1).resample('1m').last().diff(periods = period) - df1 = df1.stack(level = 1).groupby(level = 'date').last() + df1 = df.groupby(level = ['date','series']).last() + df1 = df1.unstack(level = 1).resample('1m').last().diff() + df1 = df1.stack(level = 1).groupby(level = 'date').last()['spread'] return df1 def nearestDate(base, dates): @@ -115,7 +117,6 @@ df1.adjpv= df1.adjpv.where(df1.adjpv.notnull(), df1.pv) df1['spread'] = (df1['adjpv']*100-df1['mark'])/df1['moddur']*100 df1 = df1.set_index(['monthend_date','identifier','source']) -return df1 #difference = difference in spread #Filter @@ -125,7 +126,7 @@ DurationFilter = 1.5 begindate = '2014-01-01' enddate = '2017-01-01' interval = 1 -HYDifference = monthlySpreadDiff(index = 'HY', period = interval) +HYDifference = monthlySpreadDiff(index = 'HY') difference = df1[abs(df1['spread'])<SpreadFilter].groupby(level=['identifier','source'])['pv','moddur','spread'].diff(periods = interval) difference = difference[abs(difference['spread'])<SpreadDiffFilter] difference = difference[abs(difference['moddur'])<DurationFilter] @@ -133,15 +134,15 @@ difference.spread = difference.spread.astype(np.float64) #method 1: for all sources - average through Time avgchange1 = pd.DataFrame(difference.groupby(level=['monthend_date']).mean()['spread']) -avgchange1 = avgchange1.merge(HYDifference, left_index = True, right_index= True) -avgchange1 = avgchange1.rename(columns={'spread_x': 'marks', 'spread_y':'HY'}) +avgchange1 = avgchange1.join(HYDifference, rsuffix = 'HY') avgchange1 = avgchange1[begindate:enddate] -regmodel1 = sm.OLS(avgchange1['marks'],avgchange1['HY']).fit() -ratio1 = (avgchange1['HY']/avgchange1['marks']).mean() +regmodel1 = sm.OLS(avgchange1['spread'],avgchange1['spreadHY']).fit() +ratio1 = (avgchange1['spreadHY']/avgchange1['spread']).mean() #method 2: look at it source by source avgchange2 = pd.DataFrame(difference.groupby(level=['monthend_date','source']).mean()['spread']) avgchange2 = avgchange2.unstack(level =1) +HYDifference = HYDifference.to_frame() HYDifference.columns = pd.MultiIndex.from_arrays([['Index'],['HY']]) avgchange2 = avgchange2.merge(HYDifference, left_index = True, right_index= True) regmodel2 ={} @@ -161,12 +162,13 @@ for source in regmodel2: difference1 = difference.reset_index().set_index('monthend_date') #difference1 = difference1.loc[difference1['source'].isin(['PRICINGDIRECT','PRICESERVE'])] #difference1 = difference1[(difference1['source'] == 'PRICESERVE')] +HYDifference.columns = pd.MultiIndex.from_arrays([('HY',)]) difference1 = difference1.merge(HYDifference, left_index = True, right_index= True) regmodel3 = {} Rsquared3 = {} params3 = {} for source, g in difference1.groupby('source'): - regmodel3[source] = sm.OLS(g['spread'],g[('Index', 'HY')], missing = 'drop').fit() + regmodel3[source] = sm.OLS(g['spread'],g['HY'], missing = 'drop').fit() Rsquared3[source] = regmodel3[source].rsquared params3[source] = regmodel3[source].params #regmodel3.summary() @@ -174,7 +176,7 @@ for source, g in difference1.groupby('source'): #method 4: Let's just look at how spread changes within each source (i.e. see if spread widening ==> lower marks for most/all bonds?) #difference.xs('PRICESERVE', level = 'source', drop_level=False) #difference2[difference2['source']=='BVAL'].to_clipboard() -difference4 = difference1.rename(columns = {('Index', 'HY'): "HY Spread"}) +difference4 = difference1.rename(columns = {('HY'): "HY Spread"}) difference4['HY vs mark'] = difference4['spread'] - difference4['HY Spread'] difference4.index = difference4.index.rename('monthend_date') difference4 = difference4.set_index('source', append = True) |
