import pandas as pd from db import dbengine import numpy as np import statsmodels.api as sm from statsmodels.formula.api import ols from pandas.tseries.offsets import MonthEnd import datetime from pandas.tseries.offsets import * import bbg_helpers #from PyQt4 import QtGui #has conflict with matplotlib #import matplotlib #matplotlib.use("Q4Agg") #to prevent conflict with PyQT4 def monthlySpreadDiff(index="IG", tenor="5yr", period="1yr"): date = (pd.Timestamp.now() - pd.DateOffset(years=4)).date() sql_str = "SELECT date, series, closespread AS spread FROM index_quotes " \ "WHERE index=%s and date>=%s and tenor = %s" df = pd.read_sql_query(sql_str, dbengine('serenitasdb'), parse_dates=['date'], index_col=['date', 'series'], params=(index.upper(), date, "5yr")) df1 = df.unstack(level=1).resample('1m').last().diff(periods = period) df1 = df1.stack(level = 1).groupby(level = 'date').last() return df1 def nearestDate(base, dates): nearness = { abs(base.timestamp() - date.timestamp()) : date for date in dates } return nearness[min(nearness.keys())] def interpolate_rates(s, hist_data): date, rate = s.BDay, s.moddur v = hist_data.loc[date].values return interp1d(v[:,1], v[:,0], fill_value='extrapolate')(rate) def aux(df): #assume shock to be 40 df['adjpv'] = (df.pv+df.delta_ir*df.yielddiff/40) return df #Df = Read all external Marks df = (pd.read_sql_table('external_marks_mapped',dbengine('dawndb'), parse_dates=['date']). sort_values('date')) BBG_IP = ['192.168.9.65'] #BBG_IP = ['192.168.9.61', '192.168.9.65', '192.168.0.10', '192.168.0.12'] bbgstartdate = pd.datetime(2013, 1, 1) hist_securities = ["US0006M Index", "USSWAP2 Curncy", "USSWAP3 Curncy", "USSWAP4 Curncy", "USSWAP5 Curncy", "USSWAP7 Curncy", "USSWAP10 Curncy"] hist_fields = ['PX_LAST'] with bbg_helpers.init_bbg_session(BBG_IP) as session: hist_data = bbg_helpers.retrieve_data(session, hist_securities, hist_fields, start_date=bbgstartdate) hist_data1 = pd.concat(hist_data,ignore_index=False, names = ['Rate Index', 'numIndex']).reset_index() mapping = {"US0006M Index": .5, "USSWAP10 Curncy": 10, "USSWAP2 Curncy": 2, "USSWAP3 Curncy": 3, "USSWAP4 Curncy": 4, "USSWAP5 Curncy": 5, "USSWAP7 Curncy": 7} mapping = pd.Series(mapping, name = 'tenor') mapping.index.name = 'Rate Index' mapping = mapping.reset_index() hist_data1 = hist_data1.merge(mapping, on='Rate Index') hist_data1 = hist_data1[['date','PX_LAST','tenor']].set_index('date') #rate = interpolateRates(hist_data1, '2016-12-20', 3.4) #Ddf1 = reformat dates, get rid of duplicates of external marks df1 = df df1['date'] = pd.to_datetime(df['date'], errors= 'coerce') df1 = df1.sort_values('date') df1 = df1.set_index(['date','identifier','source']) df1 = df1.groupby(level=['date','identifier','source'])['mark'].mean() df1 = df1.reset_index() df1 = df1.set_index(['date']) #df2 = mapping of monthend to the closest available model date mapping = pd.read_sql_query("select distinct date(timestamp) as model_date from priced",dbengine('rmbs_model'), parse_dates = ['model_date']).sort_values('model_date') df2 = df.set_index(['date']) df2 = df2.groupby(level=['date'])['mark'].mean() for dt in df2.index: df2[dt]= nearestDate(dt, mapping['model_date']) df2 = pd.DataFrame(df2) df2.columns = ['model_date'] #merge the mapped model dates back to the external marks table df1 = df1.join(df2) #Df3 = all the model runs on the available model dates df3=pd.DataFrame() sql_str = "select timestamp as date, pv, cusip, moddur, delta_ir from priced " \ "where date(timestamp) = %s and model_version = 1" for date in df2['model_date']: #Note: Need to add .to_pydatetime() to be converted into a datetime passable to MYSQL. Not required for Postgres df3= df3.append(pd.read_sql_query(sql_str,dbengine('rmbs_model'), parse_dates = ['date'], params=(date.to_pydatetime(), ))) df3= df3.rename(columns = {"cusip": "identifier"}) df3 = df3.sort_values('date') df3['date'] = df3['date'].dt.date df3 = df3.set_index(['date', 'identifier']) #Now merge external monthend marks with the available model dates df1 = df1.reset_index() df1= df1.rename(columns = {"model_date": "date", "date": "monthend_date"}) df1 = df1.set_index(['date','identifier']) df1 = df1.merge(df3, left_index = True, right_index= True) df1 = df1.reset_index().sort_values('monthend_date') availratesdate = pd.DataFrame(sorted(pd.unique(hist_data1.index)), columns=['date']) df1['BDay'] = pd.merge_asof(df1, availratesdate, left_on = 'monthend_date', right_on = 'date', allow_exact_matches=True)['date_y'] df1temp = df1.groupby(['identifier','BDay','moddur']).mean().reset_index() df1temp['yield'] = df1temp.apply(interpolate_rates, 1, args=(hist_data1,)) df1temp['yielddiff'] = df1temp.groupby(['identifier'])['yield'].diff()*100 df1 = pd.merge(df1, df1temp[['identifier', 'BDay', 'moddur','yielddiff']], on= ['identifier', 'BDay', 'moddur']) #doing in roundabout way just as notes. no need to groupby unless we are doing a diff along the way df1 = df1.groupby('identifier').apply(aux) df1.adjpv= df1.adjpv.where(df1.adjpv.notnull(), df1.pv) df1['spread'] = (df1['adjpv']*100-df1['mark'])/df1['moddur']*100 df1 = df1.set_index(['monthend_date','identifier','source']) return df1 #difference = difference in spread #Filter SpreadFilter = 1000 SpreadDiffFilter = 250 DurationFilter = 1.5 begindate = '2014-01-01' enddate = '2017-01-01' interval = 1 HYDifference = monthlySpreadDiff(index = 'HY', period = interval) difference = df1[abs(df1['spread']) lower marks for most/all bonds?) #difference.xs('PRICESERVE', level = 'source', drop_level=False) #difference2[difference2['source']=='BVAL'].to_clipboard() difference4 = difference1.rename(columns = {('Index', 'HY'): "HY Spread"}) difference4['HY vs mark'] = difference4['spread'] - difference4['HY Spread'] difference4.index = difference4.index.rename('monthend_date') difference4 = difference4.set_index('source', append = True) meanmarkchange= difference4.groupby(level = ['monthend_date', 'source'])['spread'].mean().unstack(level = -1) StdDevmarkchange = difference4.groupby(level = ['monthend_date', 'source'])['spread'].std().unstack(level = -1) #failed code #df1 = df #df2 = df #df1 = df1.set_index(['date','identifier','source']) #df2['prevdate'] = df2['date']- MonthEnd(n=1) #df2 = df2.rename(columns={'date': 'nextdate', 'prevdate':'date'}) #df2 = df2.set_index(['date','identifier','source']) #import pdb; pdb.set_trace() #df3 = df1.mark - df2.mark #now let's get HY spreads # index = 'IG' # def readtenorSpreadold(index): # serenitasdb = dbengine('serenitasdb') # maxindexnumber = pd.read_sql_query("select max(series) from index_version where index= %s",serenitasdb, params=(index,))['max'][0] # d={} # seriesstart = {} # df = pd.DataFrame() # for i in range(maxindexnumber-8, maxindexnumber+1): # #import pdb; pdb.set_trace() # d[i]= pd.read_sql_query("select date, closeprice, closespread from index_quotes where index= %s and series = %s and tenor = '5yr' order by date desc",serenitasdb, parse_dates = ['date'], params=(index,i)) # seriesstart[i] = d[i].min(axis=0).date # seriesstart[maxindexnumber+1]= pd.Timestamp(2030,1,1) # for i in range(maxindexnumber-8, maxindexnumber+1): # df = df.append(d[i][(d[i].date>seriesstart[i]) & (d[i].date