diff options
| -rw-r--r-- | python/marks_to_HY.py | 166 |
1 files changed, 146 insertions, 20 deletions
diff --git a/python/marks_to_HY.py b/python/marks_to_HY.py index 95e7ac34..ff4e72ea 100644 --- a/python/marks_to_HY.py +++ b/python/marks_to_HY.py @@ -2,23 +2,64 @@ import pandas as pd from db import dbengine import numpy as np import statsmodels.api as sm -from statsmodels.formula.api import gls +from statsmodels.formula.api import ols from pandas.tseries.offsets import MonthEnd import datetime -from PyQt4 import QtGui #has conflict with matplotlib +from pandas.tseries.offsets import * +import bbg_helpers +#from PyQt4 import QtGui #has conflict with matplotlib #import matplotlib #matplotlib.use("Q4Agg") #to prevent conflict with PyQT4 +def monthlySpreadDiff(index="IG", tenor="5yr", period="1yr"): + date = (pd.Timestamp.now() - pd.DateOffset(years=4)).date() + sql_str = "SELECT date, series, closespread AS spread FROM index_quotes " \ + "WHERE index=%s and date>=%s and tenor = %s" + df = pd.read_sql_query(sql_str, dbengine('serenitasdb'), parse_dates=['date'], index_col=['date', 'series'], + params=(index.upper(), date, "5yr")) + df1 = df.unstack(level=1).resample('1m').last().diff(periods = period) + df1 = df1.stack(level = 1).groupby(level = 'date').last() + return df1 + def nearestDate(base, dates): nearness = { abs(base.timestamp() - date.timestamp()) : date for date in dates } return nearness[min(nearness.keys())] +def interpolate_rates(s, hist_data): + date, rate = s.BDay, s.moddur + v = hist_data.loc[date].values + return interp1d(v[:,1], v[:,0], fill_value='extrapolate')(rate) + +def aux(df): + #assume shock to be 40 + df['adjpv'] = (df.pv+df.delta_ir*df.yielddiff/40) + return df + + #Df = Read all external Marks -df = (pd.read_sql_table('external_marks_mapped',dbengine('rmbs_model'), +df = (pd.read_sql_table('external_marks_mapped',dbengine('dawndb'), parse_dates=['date']). sort_values('date')) +BBG_IP = ['192.168.9.65'] +#BBG_IP = ['192.168.9.61', '192.168.9.65', '192.168.0.10', '192.168.0.12'] +bbgstartdate = pd.datetime(2013, 1, 1) +hist_securities = ["US0006M Index", "USSWAP2 Curncy", "USSWAP3 Curncy", "USSWAP4 Curncy", "USSWAP5 Curncy", "USSWAP7 Curncy", "USSWAP10 Curncy"] +hist_fields = ['PX_LAST'] + +with bbg_helpers.init_bbg_session(BBG_IP) as session: + hist_data = bbg_helpers.retrieve_data(session, hist_securities, hist_fields, start_date=bbgstartdate) + +hist_data1 = pd.concat(hist_data,ignore_index=False, names = ['Rate Index', 'numIndex']).reset_index() +mapping = {"US0006M Index": .5, "USSWAP10 Curncy": 10, "USSWAP2 Curncy": 2, "USSWAP3 Curncy": 3, "USSWAP4 Curncy": 4, "USSWAP5 Curncy": 5, "USSWAP7 Curncy": 7} +mapping = pd.Series(mapping, name = 'tenor') +mapping.index.name = 'Rate Index' +mapping = mapping.reset_index() +hist_data1 = hist_data1.merge(mapping, on='Rate Index') +hist_data1 = hist_data1[['date','PX_LAST','tenor']].set_index('date') +#rate = interpolateRates(hist_data1, '2016-12-20', 3.4) + #Ddf1 = reformat dates, get rid of duplicates of external marks df1 = df df1['date'] = pd.to_datetime(df['date'], errors= 'coerce') @@ -29,7 +70,7 @@ df1 = df1.reset_index() df1 = df1.set_index(['date']) #df2 = mapping of monthend to the closest available model date -mapping = pd.read_sql_query("select distinct date(timestamp) as model_date from priced",dbengine('dawndb'), parse_dates = ['model_date']).sort_values('model_date') +mapping = pd.read_sql_query("select distinct date(timestamp) as model_date from priced",dbengine('rmbs_model'), parse_dates = ['model_date']).sort_values('model_date') df2 = df.set_index(['date']) df2 = df2.groupby(level=['date'])['mark'].mean() for dt in df2.index: @@ -42,9 +83,11 @@ df1 = df1.join(df2) #Df3 = all the model runs on the available model dates df3=pd.DataFrame() -sql_str = "select timestamp as date, cusip, moddur from priced where date(timestamp) = %s and model_version = 3" +sql_str = "select timestamp as date, pv, cusip, moddur, delta_ir from priced " \ + "where date(timestamp) = %s and model_version = 1" for date in df2['model_date']: - df3= df3.append(pd.read_sql_query(sql_str,dbengine('dawndb'), parse_dates = ['date'], params=(date, ))) + #Note: Need to add .to_pydatetime() to be converted into a datetime passable to MYSQL. Not required for Postgres + df3= df3.append(pd.read_sql_query(sql_str,dbengine('rmbs_model'), parse_dates = ['date'], params=(date.to_pydatetime(), ))) df3= df3.rename(columns = {"cusip": "identifier"}) df3 = df3.sort_values('date') @@ -57,22 +100,86 @@ df1= df1.rename(columns = {"model_date": "date", "date": "monthend_date"}) df1 = df1.set_index(['date','identifier']) df1 = df1.merge(df3, left_index = True, right_index= True) -df1['spread'] = (100-df1['mark'])/df1['moddur'] -df1 = df1.reset_index() -df1 = df1.set_index(['identifier','source','monthend_date']) +df1 = df1.reset_index().sort_values('monthend_date') +availratesdate = pd.DataFrame(sorted(pd.unique(hist_data1.index)), columns=['date']) +df1['BDay'] = pd.merge_asof(df1, availratesdate, left_on = 'monthend_date', right_on = 'date', allow_exact_matches=True)['date_y'] -#df5 = difference in spread -difference = df1.groupby(level=['identifier','source']).diff() +df1temp = df1.groupby(['identifier','BDay','moddur']).mean().reset_index() +df1temp['yield'] = df1temp.apply(interpolate_rates, 1, args=(hist_data1,)) +df1temp['yielddiff'] = df1temp.groupby(['identifier'])['yield'].diff()*100 +df1 = pd.merge(df1, df1temp[['identifier', 'BDay', 'moddur','yielddiff']], on= ['identifier', 'BDay', 'moddur']) -def readtenorSpread1(index="IG", tenor="5yr"): - date = (pd.Timestamp.now() - pd.DateOffset(years=4)).date() - sql_str = "SELECT date, series, closespread AS spread FROM index_quotes " \ - "WHERE index=%s and date>=%s and tenor = %s" - df = pd.read_sql_query(sql_str, dbengine('serenitasdb'), parse_dates=['date'], index_col=['date', 'series'], - params=(index.upper(), date, "5yr")) - df1 = df.unstack(level=1).resample('1m').last().diff() - df2 = df1.stack(level = 1).groupby(level = 'date').last() - return df2 +#doing in roundabout way just as notes. no need to groupby unless we are doing a diff along the way +df1 = df1.groupby('identifier').apply(aux) +df1.adjpv= df1.adjpv.where(df1.adjpv.notnull(), df1.pv) +df1['spread'] = (df1['adjpv']*100-df1['mark'])/df1['moddur']*100 + +df1 = df1.set_index(['monthend_date','identifier','source']) +return df1 + +#difference = difference in spread +#Filter +SpreadFilter = 1000 +SpreadDiffFilter = 250 +DurationFilter = 1.5 +begindate = '2014-01-01' +enddate = '2017-01-01' +interval = 1 +HYDifference = monthlySpreadDiff(index = 'HY', period = interval) +difference = df1[abs(df1['spread'])<SpreadFilter].groupby(level=['identifier','source'])['pv','moddur','spread'].diff(periods = interval) +difference = difference[abs(difference['spread'])<SpreadDiffFilter] +difference = difference[abs(difference['moddur'])<DurationFilter] +difference.spread = difference.spread.astype(np.float64) + +#method 1: for all sources - average through Time +avgchange1 = pd.DataFrame(difference.groupby(level=['monthend_date']).mean()['spread']) +avgchange1 = avgchange1.merge(HYDifference, left_index = True, right_index= True) +avgchange1 = avgchange1.rename(columns={'spread_x': 'marks', 'spread_y':'HY'}) +avgchange1 = avgchange1[begindate:enddate] +regmodel1 = sm.OLS(avgchange1['marks'],avgchange1['HY']).fit() +ratio1 = (avgchange1['HY']/avgchange1['marks']).mean() + +#method 2: look at it source by source +avgchange2 = pd.DataFrame(difference.groupby(level=['monthend_date','source']).mean()['spread']) +avgchange2 = avgchange2.unstack(level =1) +HYDifference.columns = pd.MultiIndex.from_arrays([['Index'],['HY']]) +avgchange2 = avgchange2.merge(HYDifference, left_index = True, right_index= True) +regmodel2 ={} +ratio2 = {} +for source in avgchange2['spread']: + regmodel2[source] = sm.OLS(avgchange2['spread'][source],avgchange2['Index']['HY'], missing= 'drop').fit() + ratio2[source] = (avgchange2['spread'][source]/avgchange2['Index']['HY']).mean() + +Rsquared2 = {} +params2 = {} +for source in regmodel2: + Rsquared2[source] = regmodel2[source].rsquared + params2[source] = regmodel2[source].params + +#method 3: just regress mark spread change (x) to HY change (y) regardless of Time +#has filter to do JUST priceserve and pricingdirect +difference1 = difference.reset_index().set_index('monthend_date') +#difference1 = difference1.loc[difference1['source'].isin(['PRICINGDIRECT','PRICESERVE'])] +#difference1 = difference1[(difference1['source'] == 'PRICESERVE')] +difference1 = difference1.merge(HYDifference, left_index = True, right_index= True) +regmodel3 = {} +Rsquared3 = {} +params3 = {} +for source, g in difference1.groupby('source'): + regmodel3[source] = sm.OLS(g['spread'],g[('Index', 'HY')], missing = 'drop').fit() + Rsquared3[source] = regmodel3[source].rsquared + params3[source] = regmodel3[source].params +#regmodel3.summary() + +#method 4: Let's just look at how spread changes within each source (i.e. see if spread widening ==> lower marks for most/all bonds?) +#difference.xs('PRICESERVE', level = 'source', drop_level=False) +#difference2[difference2['source']=='BVAL'].to_clipboard() +difference4 = difference1.rename(columns = {('Index', 'HY'): "HY Spread"}) +difference4['HY vs mark'] = difference4['spread'] - difference4['HY Spread'] +difference4.index = difference4.index.rename('monthend_date') +difference4 = difference4.set_index('source', append = True) +meanmarkchange= difference4.groupby(level = ['monthend_date', 'source'])['spread'].mean().unstack(level = -1) +StdDevmarkchange = difference4.groupby(level = ['monthend_date', 'source'])['spread'].std().unstack(level = -1) #failed code #df1 = df @@ -103,3 +210,22 @@ def readtenorSpread1(index="IG", tenor="5yr"): # df = df.set_index('date') # df = df.diff() # return df.dropna() + + +# def interpolateRates(df, date, duration): +# set1 = False +# set2 = False +# #import pdb; pdb.set_trace() +# for tenor in sorted(df[date]['tenor']): +# if duration < tenor and set1 == False: +# lowertenor = tenor +# lowerrate = df[date][df[date]['tenor'].isin([tenor])]['PX_LAST'] +# set1 = True +# elif duration < tenor and set2 == False and set1 == True: +# uppertenor = tenor +# upperrate = df[date][df[date]['tenor'].isin([tenor])]['PX_LAST'] +# set2 = True +# rate = (upperrate[0]-lowerrate[0])/(uppertenor-lowertenor)*(duration - lowertenor)+lowerrate[0] +# else: +# rate = df[date][df[date]['tenor'].isin([tenor])]['PX_LAST'][0] +# return rate
\ No newline at end of file |
