from analytics.index_data import get_index_quotes, index_returns from db import dbengine from analytics import Index, Portfolio from analytics.utils import roll_date from dateutil.relativedelta import relativedelta from analytics.basket_index import MarkitBasketIndex from statsmodels.sandbox.regression.predstd import wls_prediction_std from scipy.interpolate import interp1d from itertools import chain from copy import deepcopy import pandas as pd import math import statsmodels.formula.api as smf import numpy as np import matplotlib.pyplot as plt serenitasdb = dbengine('serenitasdb') dawndb = dbengine('dawndb') def on_the_run(index): r = serenitasdb.execute("SELECT max(series) FROM index_version WHERE index=%s", (index,)) series, = r.fetchone() return series def curve_spread_diff(index='IG', rolling=6, years=3, percentage=False, percentage_base='5yr'): otr = on_the_run(index) ## look at spreads df = get_index_quotes(index, list(range(otr - rolling, otr + 1)), tenor=['3yr', '5yr', '7yr', '10yr'], years=years) spreads = df.groupby(level=['date', 'tenor']).nth(-1)['closespread'].unstack(-1) spreads_diff = spreads.diff(axis=1) del spreads_diff['3yr'] spreads_diff.columns = ['3-5', '5-7', '7-10'] spreads_diff['5-10'] = spreads_diff['5-7'] + spreads_diff['7-10'] if percentage is True: spreads_diff = spreads.apply(lambda df: df/df[percentage_base], axis = 1) return spreads_diff def spreads_diff_table(spreads_diff): def current(s): return s.iat[-1] def zscore(s): return (s.iat[-1] - s.mean()) / s.std() df = spreads_diff.agg(['min', 'max','mean', current, zscore]) ((spreads_diff - spreads_diff.mean())/spreads_diff.std()).plot() return df def theta_matrix_by_series(index='IG', rolling=6): otr = on_the_run(index) df = get_index_quotes(index, list(range(otr - rolling, otr + 1)), tenor=['3yr', '5yr', '7yr', '10yr']) df['theta_per_dur'] = df.theta2 / df.duration2 theta_matrix = df.groupby(level=['date', 'tenor','series']).nth(-1)['theta_per_dur'] theta_matrix = theta_matrix.loc[theta_matrix.index[-1][0]].unstack(0) return theta_matrix[['3yr', '5yr', '7yr', '10yr']] def ratio_within_series(index='IG', rolling=6, param='duration'): otr = on_the_run(index) df = get_index_quotes(index, list(range(otr - rolling, otr + 1)), tenor=['3yr', '5yr', '7yr', '10yr']).unstack() ratio = (df[param]. apply(lambda s: s / df[param]['5yr'].values, raw=True)) ratio.columns = pd.MultiIndex.from_product([[param + '_ratio_to_5yr'], ratio.columns]) df = df.join(ratio).groupby(['date']).tail(1) df = df.reset_index(level=['index', 'version'], drop=True) return df def on_the_run_theta(index='IG', rolling=6): otr = on_the_run(index) df = get_index_quotes(index, list(range(otr - rolling, otr + 1)), tenor=['3yr', '5yr', '7yr', '10yr']) df['theta_per_dur'] = df.theta2/df.duration2 theta_matrix = df.groupby(level=['date', 'tenor']).nth(-1)['theta_per_dur'] theta_matrix.unstack(-1).plot() def curve_returns(index='IG', rolling=6): ## look at returns otr = on_the_run(index) df = index_returns(index=index, series=list(range(otr - rolling, otr + 1)), tenor=['3yr', '5yr', '7yr', '10yr']) ## on-the-run returns df = df.reset_index().set_index(['date', 'series', 'tenor']) returns = df.price_return.dropna().unstack(-1).groupby(level='date').nth(-1) strategies_return = pd.DataFrame( {'5-10': 1.78 * returns['5yr'] - returns['10yr'], '7-10': 1.33 * returns['7yr'] - returns['10yr'], '3-5-10': -2 * returns['3yr'] + 3 * returns['5yr'] - returns['10yr'], '3-5': returns['5yr'] - 1.56 * returns['3yr'], '3-7': returns['7yr'] - 2.07 * returns['3yr']}) strategies_return_monthly = (strategies_return. groupby(pd.Grouper(freq='M')). agg(lambda df: (1 + df).prod() - 1)) def sharpe(df, period="daily"): if period == "daily": return df.mean() / df.std() * math.sqrt(252) else: return df.mean() / df.std() * math.sqrt(12) results = strategies_return.agg([sharpe, lambda df: df.nsmallest(10).mean()]) sharpe_monthly = strategies_return_monthly.agg(sharpe, period="monthly") sharpe_monthly.name = 'Monthly Sharpe' results.index=['Sharpe', 'Mean Worst 10 Days DrawDown'] return results.append(sharpe_monthly) def cross_series_curve(index='IG', rolling=6): otr = on_the_run(index) df = index_returns(index= index, series=list(range(otr - rolling, otr + 1)), tenor=['3yr', '5yr', '7yr', '10yr']) ## look cross series - 3y to 5y df = df.reset_index().set_index(['date', 'index', 'tenor', 'series']) returns1 = df.xs(['5yr', index], level = ['tenor','index']).price_return.unstack(-1) price_diff = pd.DataFrame() for ind in list(range(otr - 2, otr + 1)): price_diff[ind] = returns1[ind] - 1.6 * returns1[ind - 4] price_diff = price_diff.stack().groupby(level = 'date').nth(-1) monthly_returns_cross_series = (price_diff. groupby(pd.Grouper(freq='M')). agg(lambda df: (1 + df).prod() - 1)) plt.plot(monthly_returns_cross_series) def forward_loss(index='IG'): start_date = (pd.Timestamp.now() - pd.DateOffset(years=3)).date() df = pd.read_sql_query("SELECT date, index, series, tenor, duration, closespread, "\ "closespread*duration / 100 AS indexel " \ "FROM index_quotes WHERE index=%s AND date >= %s " \ "ORDER BY date DESC, series ASC, duration ASC", serenitasdb, parse_dates=['date'], params=[index, start_date]) df1 = pd.read_sql_query("SELECT index, series, tenor, maturity FROM index_maturity", serenitasdb, parse_dates=['maturity']) df = df.merge(df1, on=['index','series','tenor']) df = df.set_index(['date','index', 'maturity']).dropna() df = df.groupby(level=['date','index', 'maturity']).nth(-1) # annual change, to take out some noise df['fwd_loss_rate'] = df.indexel.diff(2)/df.duration.diff(2) def curve_model(tenor_1='5yr', tenor_2='10yr'): #OLS model df = ratio_within_series(param='closespread') df = pd.concat([df.duration[tenor_1], df.duration[tenor_2], df.closespread[tenor_1], df.closespread_ratio_to_5yr[tenor_2], df.theta[tenor_1], df.theta[tenor_2]], axis=1, keys=['duration1', 'duration2', 'closespread', 'ratio', 'theta1', 'theta2']) df = np.log(df) ols_model = smf.ols('ratio ~ closespread + duration1 + theta1 + theta2', data=df).fit() return df, ols_model def curve_model_results(df, model): df = df.dropna() prstd_ols, df['down_2_stdev'], df['up_2_stdev'] = wls_prediction_std(model) #dr/dspread = exp(k) + spread_coeff * duration ^ dur_coeff * spread ^ (spread_coeff-1) cols = ['ratio', 'closespread', 'down_2_stdev', 'up_2_stdev'] df[cols] = np.exp(df[cols]) df['predicted'] = np.exp(model.predict()) df[['predicted', 'down_2_stdev', 'up_2_stdev']]=\ df[['predicted', 'down_2_stdev', 'up_2_stdev']].multiply(df['closespread'].values, axis=0) ax = df[['predicted', 'down_2_stdev', 'up_2_stdev']].reset_index(level='series', drop=True).plot() df['dr_dspread'] = np.exp(model.params[0]) * model.params[2] * df.duration1 ** model.params[1] * df.closespread ** (model.params[2] - 1) return df def spread_fin_crisis(index='IG'): otr = on_the_run(index) ## look at spreads df = get_index_quotes(index, list(range(8, otr + 1)), tenor=['3yr', '5yr', '7yr', '10yr'], years=20) spreads = df.groupby(level=['date', 'tenor']).nth(-1)['closespread'].unstack(-1) spreads_diff = spreads.diff(axis=1) to_plot = pd.DataFrame() to_plot['spread'] = spreads['5yr'] to_plot['3 - 5 diff'] = spreads_diff['5yr'] to_plot['5 - 10 diff'] = spreads_diff['7yr'] + spreads_diff['10yr'] fig = plt.figure() ax = fig.add_subplot(111) ax2 = ax.twinx() # Create another axes that shares the same x-axis as ax width = 0.4 to_plot['spread'].plot(color='red', ax=ax) to_plot['5 - 10 diff'].plot(color='blue', ax=ax2) to_plot['3 - 5 diff'].plot(color='green', ax=ax2) plt.legend(bbox_to_anchor=(.5, -.1), ncol = 2) plt.show() def forward_spread(report_date, index='IG', series=None, tenors=['3yr', '5yr', '7yr', '10yr']): if series is None: series = on_the_run(index = index) b_index = MarkitBasketIndex(index, series, tenors, value_date=report_date) b_index.tweak() f_spread = [] date_range = pd.bdate_range(pd.datetime.today(), max(b_index.maturities), freq='M') for d in date_range.date: b_index.value_date = d f_spread.append(b_index.spread()) return pd.concat(f_spread, keys=date_range).unstack(-1) def spot_forward(index='IG', series=None, tenors=['3yr', '5yr', '7yr', '10yr']): ''' Calculates the 1-year forward spot rate ''' if series is None: series = on_the_run(index) b_index = MarkitBasketIndex(index, series, tenors) b_index.tweak() spreads_current = b_index.spread() spreads_current.name = 'current' spreads_1yr = pd.Series([b_index.spread(m - relativedelta(years=1), b_index.coupon(m)) \ for m in b_index.maturities], index=tenors) spreads_1yr.name = '1yr' df = pd.concat([spreads_current, spreads_1yr], axis=1) maturity_1yr = roll_date(b_index.index_desc.issue_date[0], 1) df_0 = pd.DataFrame({'current':[0., b_index.spread(maturity_1yr, 0.01 if index == "IG" else 0.05)], '1yr': [0., 0.]}, index=['0yr', '1yr']) df_0.index.name = 'tenor' df = df_0.append(df) df['maturity'] = [b_index.value_date, maturity_1yr] + b_index.maturities return df.reset_index().set_index('maturity') def curve_pos(value_date, index='IG'): ''' value_date : :class:`datetime.date` index : string one of 'IG', 'HY' or 'ITRX' Returns a Portfolio of curve trades ''' sql_string = "SELECT * FROM cds where trade_date < %s" df = pd.read_sql_query(sql_string, dawndb, parse_dates=['trade_date', 'maturity'], params=[value_date]) df = df[df['folder'] == f'SER_{index}CURVE'] df.notional = df.notional.where(df.protection == 'Seller', -df.notional) df = df.groupby(['security_id', 'maturity'])['notional'].sum() df = df.iloc[df.nonzero()[0]].reset_index() sql_string = "SELECT * FROM index_maturity LEFT JOIN index_version USING (index, series)" lookup_table = pd.read_sql_query(sql_string, serenitasdb, parse_dates=['maturity']) df = df.merge(lookup_table, left_on=['security_id','maturity'], right_on=['redindexcode', 'maturity']) indices = [] sql_string = "SELECT closespread FROM index_quotes where index = %s and series = %s and tenor = %s and date = %s" for i, row in df[['index', 'tenor', 'series', 'notional']].iterrows(): temp = Index.from_name(row['index'], row.series, row.tenor) temp.value_date = value_date.date() if row.notional > 0: temp.direction = 'Seller' temp.notional = abs(row.notional) spread_df = pd.read_sql_query(sql_string, serenitasdb, params=[row['index'], row.series, row.tenor, value_date.date()]) temp.spread = spread_df.iloc[0][0] indices.append(temp) return Portfolio(indices) def curve_shape(value_date, index='IG', percentile=.95, spread=None): ''' Returns a function to linearly interpolate between the curve based on maturity (in years)''' curve_shape = curve_spread_diff(index, 10, 5, True) steepness = (curve_shape['10yr']/curve_shape['3yr']) series = on_the_run(index) if spread is None: sql_string = "SELECT closespread FROM index_quotes where index = %s and series = %s and tenor = %s and date = %s" spread_df = pd.read_sql_query(sql_string, serenitasdb, params=[index, series, '5yr', value_date.date()]) spread = spread_df.iloc[0][0] sql_string = "SELECT tenor, maturity FROM index_maturity where index = %s and series = %s" lookup_table = pd.read_sql_query(sql_string, serenitasdb, parse_dates=['maturity'], params=[index, series]) df = curve_shape[steepness == steepness.quantile(percentile, 'nearest')] df = df * spread/df['5yr'][0] df = df.stack().rename('spread') df = df.reset_index().merge(lookup_table, on=['tenor']) df['year_frac'] = (df.maturity - pd.to_datetime(value_date)).dt.days/365 return interp1d(np.hstack([0, df.year_frac]), np.hstack([0, df.spread])) def pos_pnl_abs(portf, value_date, index='IG', rolling=6, years=3): ''' Runs PNL analysis on portf using historical on-the-run spread levels - off-the-runs spreads are duration linearly interpolated''' series = on_the_run(index) df = get_index_quotes(index, list(range(series - rolling, series + 1)), tenor=['3yr', '5yr', '7yr', '10yr'], years=years) df = df.groupby(level=['date', 'tenor']).nth(-1)['closespread'].unstack(-1) sql_string = "SELECT tenor, maturity FROM index_maturity where index = %s and series = %s" lookup_table = pd.read_sql_query(sql_string, serenitasdb, parse_dates=['maturity'], params=[index, series]) lookup_table['year_frac'] = (lookup_table.maturity - pd.to_datetime(value_date)).dt.days/365 indices = [] for i, ind in enumerate(portf.indices): indices.append(deepcopy(ind)) portf_copy = Portfolio(indices) portf_copy.reset_pv() portf_copy.reset_pv() r = [] for date, row in df.iterrows(): f = interp1d(np.hstack([0, lookup_table['year_frac']]), np.hstack([row[0]/2, row])) for ind in portf_copy.indices: ind.spread = f((pd.to_datetime(ind.end_date) - value_date).days/365) r.append([[date, f(5)] + [portf_copy.pnl]]) df = pd.DataFrame.from_records(chain(*r), columns=['date', 'five_yr_spread', 'pnl']) return df.set_index('date') def curve_scen_table(portf): ''' Runs PNL scenario on portf by shocking different points on the curve - off-the-runs shocks are duration linearly interpolated''' value_date = portf.value_date indices = [] for i, ind in enumerate(portf.indices): indices.append(deepcopy(ind)) portf_copy = Portfolio(indices) portf_copy.reset_pv() index = portf_copy.indices[0].index_type series = on_the_run(index) sql_string = "SELECT tenor, maturity FROM index_maturity where index = %s and series = %s" lookup_table = pd.read_sql_query(sql_string, serenitasdb, parse_dates=['maturity'], params=[index, series]) lookup_table['year_frac'] = (lookup_table.maturity - pd.to_datetime(value_date)).dt.days/365 lookup_table = lookup_table.iloc[[0,1,3]] year_frac = lookup_table.year_frac.tolist() tenors = lookup_table.tenor.tolist() shock = 10 #shocks in bps r = [] tenor_shock = pd.DataFrame(0, index=lookup_table['year_frac'], columns=['shock']) for t_frac, t_ten in zip(year_frac, tenors): tenor_shock.loc[t_frac] = -shock for w_frac, w_ten in zip(year_frac, tenors): tenor_shock.loc[w_frac] = 0 if t_ten == w_ten else shock #f is the shock amount interpolated based on tenor f = interp1d(np.hstack([0, year_frac]), np.hstack([tenor_shock.shock.iloc[0], tenor_shock.shock])) for i, ind in enumerate(portf_copy.indices): ind.spread = max(0, portf.indices[i].spread + f((ind.end_date - value_date).days/365)) r.append([t_ten, w_ten] + [portf_copy.pv - portf.pv]) tenor_shock.loc[w_frac] = 0 tenor_shock.loc[t_frac] = 0 return pd.DataFrame.from_records(r, columns=['tighter', 'wider', 'pnl'])