import pandas as pd from db import dbengine import numpy as np import statsmodels.api as sm from statsmodels.formula.api import ols from pandas.tseries.offsets import MonthEnd from scipy.interpolate import interp1d import datetime from pandas.tseries.offsets import * import bbg_helpers #from PyQt4 import QtGui #has conflict with matplotlib #import matplotlib #matplotlib.use("Q4Agg") #to prevent conflict with PyQT4 def monthlySpreadDiff(index="IG", tenor="5yr"): date = (pd.Timestamp.now() - pd.DateOffset(years=4)).date() sql_str = "SELECT date, series, version, closespread AS spread FROM index_quotes " \ "WHERE index=%s and date>=%s and tenor = %s" df = pd.read_sql_query(sql_str, dbengine('serenitasdb'), parse_dates=['date'], index_col=['date', 'series'], params=(index.upper(), date, "5yr")) df1 = df.groupby(level = ['date','series']).last() df1 = df1.unstack(level = 1).resample('1m').last().diff() df1 = df1.stack(level = 1).groupby(level = 'date').last()['spread'] return df1 def nearestDate(base, dates): nearness = { abs(base.timestamp() - date.timestamp()) : date for date in dates } return nearness[min(nearness.keys())] def interpolate_rates(s, hist_data): date, rate = s.BDay, s.moddur v = hist_data.loc[date].values return interp1d(v[:,1], v[:,0], fill_value='extrapolate')(rate) def aux(df): #assume shock to be 40 df['adjpv'] = (df.pv+df.delta_ir*df.yielddiff/40) return df #Df = Read all external Marks df = (pd.read_sql_table('external_marks_mapped',dbengine('dawndb'), parse_dates=['date']). sort_values('date')) BBG_IP = ['192.168.9.65'] #BBG_IP = ['192.168.9.61', '192.168.9.65', '192.168.0.10', '192.168.0.12'] bbgstartdate = pd.datetime(2013, 1, 1) hist_securities = ["US0006M Index", "USSWAP2 Curncy", "USSWAP3 Curncy", "USSWAP4 Curncy", "USSWAP5 Curncy", "USSWAP7 Curncy", "USSWAP10 Curncy"] hist_fields = ['PX_LAST'] with bbg_helpers.init_bbg_session(BBG_IP) as session: hist_data = bbg_helpers.retrieve_data(session, hist_securities, hist_fields, start_date=bbgstartdate) hist_data1 = pd.concat(hist_data,ignore_index=False, names = ['Rate Index', 'numIndex']).reset_index() mapping = {"US0006M Index": .5, "USSWAP10 Curncy": 10, "USSWAP2 Curncy": 2, "USSWAP3 Curncy": 3, "USSWAP4 Curncy": 4, "USSWAP5 Curncy": 5, "USSWAP7 Curncy": 7} mapping = pd.Series(mapping, name = 'tenor') mapping.index.name = 'Rate Index' mapping = mapping.reset_index() hist_data1 = hist_data1.merge(mapping, on='Rate Index') hist_data1 = hist_data1[['date','PX_LAST','tenor']].set_index('date') #rate = interpolateRates(hist_data1, '2016-12-20', 3.4) #Ddf1 = reformat dates, get rid of duplicates of external marks df1 = df df1['date'] = pd.to_datetime(df['date'], errors= 'coerce') df1 = df1.sort_values('date') df1 = df1.set_index(['date','identifier','source']) df1 = df1.groupby(level=['date','identifier','source'])['mark'].mean() df1 = df1.reset_index() df1 = df1.set_index(['date']) #df2 = mapping of monthend to the closest available model date mapping = pd.read_sql_query("select distinct date(timestamp) as model_date from priced",dbengine('rmbs_model'), parse_dates = ['model_date']).sort_values('model_date') df2 = df.set_index(['date']) df2 = df2.groupby(level=['date'])['mark'].mean() for dt in df2.index: df2[dt]= nearestDate(dt, mapping['model_date']) df2 = pd.DataFrame(df2) df2.columns = ['model_date'] #merge the mapped model dates back to the external marks table df1 = df1.join(df2) #Df3 = all the model runs on the available model dates df3=pd.DataFrame() sql_str = "select timestamp as date, pv, cusip, moddur, delta_ir from priced " \ "where date(timestamp) = %s and model_version = 1" for date in df2['model_date']: #Note: Need to add .to_pydatetime() to be converted into a datetime passable to MYSQL. Not required for Postgres df3= df3.append(pd.read_sql_query(sql_str,dbengine('rmbs_model'), parse_dates = ['date'], params=(date.to_pydatetime(), ))) df3= df3.rename(columns = {"cusip": "identifier"}) df3 = df3.sort_values('date') df3['date'] = df3['date'].dt.date df3 = df3.set_index(['date', 'identifier']) #Now merge external monthend marks with the available model dates df1 = df1.reset_index() df1= df1.rename(columns = {"model_date": "date", "date": "monthend_date"}) df1 = df1.set_index(['date','identifier']) df1 = df1.merge(df3, left_index = True, right_index= True) df1 = df1.reset_index().sort_values('monthend_date') availratesdate = pd.DataFrame(sorted(pd.unique(hist_data1.index)), columns=['date']) df1['BDay'] = pd.merge_asof(df1, availratesdate, left_on = 'monthend_date', right_on = 'date', allow_exact_matches=True)['date_y'] df1temp = df1.groupby(['identifier','BDay','moddur']).mean().reset_index() df1temp['yield'] = df1temp.apply(interpolate_rates, 1, args=(hist_data1,)) df1temp['yielddiff'] = df1temp.groupby(['identifier'])['yield'].diff()*100 df1 = pd.merge(df1, df1temp[['identifier', 'BDay', 'moddur','yielddiff']], on= ['identifier', 'BDay', 'moddur']) #doing in roundabout way just as notes. no need to groupby unless we are doing a diff along the way df1 = df1.groupby('identifier').apply(aux) df1.adjpv= df1.adjpv.where(df1.adjpv.notnull(), df1.pv) df1['spread'] = (df1['adjpv']*100-df1['mark'])/df1['moddur']*100 df1 = df1.set_index(['monthend_date','identifier','source']) #difference = difference in spread #Filter SpreadFilter = 1000 SpreadDiffFilter = 250 DurationFilter = 1.5 begindate = '2014-01-01' enddate = '2017-01-01' interval = 1 HYDifference = monthlySpreadDiff(index = 'HY') difference = df1[abs(df1['spread']) lower marks for most/all bonds?) #difference.xs('PRICESERVE', level = 'source', drop_level=False) #difference2[difference2['source']=='BVAL'].to_clipboard() difference4 = difference1.rename(columns = {('HY'): "HY Spread"}) difference4['HY vs mark'] = difference4['spread'] - difference4['HY Spread'] difference4.index = difference4.index.rename('monthend_date') difference4 = difference4.set_index('source', append = True) meanmarkchange= difference4.groupby(level = ['monthend_date', 'source'])['spread'].mean().unstack(level = -1) StdDevmarkchange = difference4.groupby(level = ['monthend_date', 'source'])['spread'].std().unstack(level = -1) #failed code #df1 = df #df2 = df #df1 = df1.set_index(['date','identifier','source']) #df2['prevdate'] = df2['date']- MonthEnd(n=1) #df2 = df2.rename(columns={'date': 'nextdate', 'prevdate':'date'}) #df2 = df2.set_index(['date','identifier','source']) #import pdb; pdb.set_trace() #df3 = df1.mark - df2.mark #now let's get HY spreads # index = 'IG' # def readtenorSpreadold(index): # serenitasdb = dbengine('serenitasdb') # maxindexnumber = pd.read_sql_query("select max(series) from index_version where index= %s",serenitasdb, params=(index,))['max'][0] # d={} # seriesstart = {} # df = pd.DataFrame() # for i in range(maxindexnumber-8, maxindexnumber+1): # #import pdb; pdb.set_trace() # d[i]= pd.read_sql_query("select date, closeprice, closespread from index_quotes where index= %s and series = %s and tenor = '5yr' order by date desc",serenitasdb, parse_dates = ['date'], params=(index,i)) # seriesstart[i] = d[i].min(axis=0).date # seriesstart[maxindexnumber+1]= pd.Timestamp(2030,1,1) # for i in range(maxindexnumber-8, maxindexnumber+1): # df = df.append(d[i][(d[i].date>seriesstart[i]) & (d[i].date