diff options
Diffstat (limited to 'python/analytics')
| -rw-r--r-- | python/analytics/curve_trades.py | 62 | ||||
| -rw-r--r-- | python/analytics/index_data.py | 74 |
2 files changed, 86 insertions, 50 deletions
diff --git a/python/analytics/curve_trades.py b/python/analytics/curve_trades.py index 36ff319e..4fd7641e 100644 --- a/python/analytics/curve_trades.py +++ b/python/analytics/curve_trades.py @@ -20,13 +20,14 @@ dawndb = dbengine('dawndb') def on_the_run(index): r = serenitasdb.execute("SELECT max(series) FROM index_version WHERE index=%s", - (index,)) + (index,)) series, = r.fetchone() return series + def curve_spread_diff(index='IG', rolling=6, years=3, percentage=False, percentage_base='5yr'): otr = on_the_run(index) - ## look at spreads + # look at spreads df = get_index_quotes(index, list(range(otr - rolling, otr + 1)), tenor=['3yr', '5yr', '7yr', '10yr'], years=years) spreads = df.groupby(level=['date', 'tenor']).nth(-1)['closespread'].unstack(-1) @@ -35,18 +36,21 @@ def curve_spread_diff(index='IG', rolling=6, years=3, percentage=False, percenta spreads_diff.columns = ['3-5', '5-7', '7-10'] spreads_diff['5-10'] = spreads_diff['5-7'] + spreads_diff['7-10'] if percentage is True: - spreads_diff = spreads.apply(lambda df: df/df[percentage_base], axis = 1) + spreads_diff = spreads.apply(lambda df: df/df[percentage_base], axis=1) return spreads_diff + def spreads_diff_table(spreads_diff): def current(s): return s.iat[-1] + def zscore(s): return (s.iat[-1] - s.mean()) / s.std() - df = spreads_diff.agg(['min', 'max','mean', current, zscore]) + df = spreads_diff.agg(['min', 'max', 'mean', current, zscore]) ((spreads_diff - spreads_diff.mean())/spreads_diff.std()).plot() return df + def theta_matrix_by_series(index='IG', rolling=6): otr = on_the_run(index) df = get_index_quotes(index, list(range(otr - rolling, otr + 1)), @@ -56,17 +60,20 @@ def theta_matrix_by_series(index='IG', rolling=6): theta_matrix = theta_matrix.loc[theta_matrix.index[-1][0]].unstack(0) return theta_matrix[['3yr', '5yr', '7yr', '10yr']] + def ratio_within_series(index='IG', rolling=6, param='duration'): otr = on_the_run(index) df = get_index_quotes(index, list(range(otr - rolling, otr + 1)), tenor=['3yr', '5yr', '7yr', '10yr']).unstack() ratio = (df[param]. apply(lambda s: s / df[param]['5yr'].values, raw=True)) - ratio.columns = pd.MultiIndex.from_product([[param + '_ratio_to_5yr'], ratio.columns]) + ratio.columns = pd.MultiIndex.from_product([[f"{param}_ratio_to_5yr"], + ratio.columns]) df = df.join(ratio).groupby(['date']).tail(1) df = df.reset_index(level=['index', 'version'], drop=True) return df + def on_the_run_theta(index='IG', rolling=6): otr = on_the_run(index) df = get_index_quotes(index, list(range(otr - rolling, otr + 1)), @@ -75,12 +82,13 @@ def on_the_run_theta(index='IG', rolling=6): theta_matrix = df.groupby(level=['date', 'tenor']).nth(-1)['theta_per_dur'] theta_matrix.unstack(-1).plot() + def curve_returns(index='IG', rolling=6): - ## look at returns + # look at returns otr = on_the_run(index) df = index_returns(index=index, series=list(range(otr - rolling, otr + 1)), tenor=['3yr', '5yr', '7yr', '10yr']) - ## on-the-run returns + # on-the-run returns df = df.reset_index().set_index(['date', 'series', 'tenor']) returns = df.price_return.dropna().unstack(-1).groupby(level='date').nth(-1) @@ -103,26 +111,28 @@ def curve_returns(index='IG', rolling=6): results = strategies_return.agg([sharpe, lambda df: df.nsmallest(10).mean()]) sharpe_monthly = strategies_return_monthly.agg(sharpe, period="monthly") sharpe_monthly.name = 'Monthly Sharpe' - results.index=['Sharpe', 'Mean Worst 10 Days DrawDown'] + results.index = ['Sharpe', 'Mean Worst 10 Days DrawDown'] return results.append(sharpe_monthly) + def cross_series_curve(index='IG', rolling=6): otr = on_the_run(index) df = index_returns(index= index, series=list(range(otr - rolling, otr + 1)), tenor=['3yr', '5yr', '7yr', '10yr']) - ## look cross series - 3y to 5y + # look cross series - 3y to 5y df = df.reset_index().set_index(['date', 'index', 'tenor', 'series']) - returns1 = df.xs(['5yr', index], level = ['tenor','index']).price_return.unstack(-1) + returns1 = df.xs(['5yr', index], level=['tenor','index']).price_return.unstack(-1) price_diff = pd.DataFrame() for ind in list(range(otr - 2, otr + 1)): price_diff[ind] = returns1[ind] - 1.6 * returns1[ind - 4] - price_diff = price_diff.stack().groupby(level = 'date').nth(-1) + price_diff = price_diff.stack().groupby(level='date').nth(-1) monthly_returns_cross_series = (price_diff. groupby(pd.Grouper(freq='M')). agg(lambda df: (1 + df).prod() - 1)) plt.plot(monthly_returns_cross_series) + def forward_loss(index='IG'): start_date = (pd.Timestamp.now() - pd.DateOffset(years=3)).date() @@ -140,6 +150,7 @@ def forward_loss(index='IG'): # annual change, to take out some noise df['fwd_loss_rate'] = df.indexel.diff(2)/df.duration.diff(2) + def curve_model(tenor_1='5yr', tenor_2='10yr'): #OLS model df = ratio_within_series(param='closespread') @@ -155,6 +166,7 @@ def curve_model(tenor_1='5yr', tenor_2='10yr'): data=df).fit() return df, ols_model + def curve_model_results(df, model): df = df.dropna() prstd_ols, df['down_2_stdev'], df['up_2_stdev'] = wls_prediction_std(model) @@ -168,9 +180,10 @@ def curve_model_results(df, model): df['dr_dspread'] = np.exp(model.params[0]) * model.params[2] * df.duration1 ** model.params[1] * df.closespread ** (model.params[2] - 1) return df + def spread_fin_crisis(index='IG'): otr = on_the_run(index) - ## look at spreads + # look at spreads df = get_index_quotes(index, list(range(8, otr + 1)), tenor=['3yr', '5yr', '7yr', '10yr'], years=20) spreads = df.groupby(level=['date', 'tenor']).nth(-1)['closespread'].unstack(-1) @@ -192,6 +205,7 @@ def spread_fin_crisis(index='IG'): plt.show() + def forward_spread(report_date, index='IG', series=None, tenors=['3yr', '5yr', '7yr', '10yr']): if series is None: @@ -206,6 +220,7 @@ def forward_spread(report_date, index='IG', series=None, tenors=['3yr', '5yr', ' f_spread.append(b_index.spread()) return pd.concat(f_spread, keys=date_range).unstack(-1) + def spot_forward(index='IG', series=None, tenors=['3yr', '5yr', '7yr', '10yr']): ''' @@ -231,6 +246,7 @@ def spot_forward(index='IG', series=None, tenors=['3yr', '5yr', '7yr', '10yr']): df['maturity'] = [b_index.value_date, maturity_1yr] + b_index.maturities return df.reset_index().set_index('maturity') + def curve_pos(value_date, index='IG'): ''' @@ -255,22 +271,26 @@ def curve_pos(value_date, index='IG'): portf.mark() return portf + def curve_shape(value_date, index='IG', percentile=.95, spread=None): ''' - Returns a function to linearly interpolate between the curve based on maturity (in years)''' + Returns a function to linearly interpolate between the curve + based on maturity (in years)''' curve_shape = curve_spread_diff(index, 10, 5, True) steepness = (curve_shape['10yr']/curve_shape['3yr']) series = on_the_run(index) if spread is None: - sql_string = "SELECT closespread FROM index_quotes where index = %s and series = %s and tenor = %s and date = %s" + sql_string = "SELECT closespread FROM index_quotes where index = %s " \ + "and series = %s and tenor = %s and date = %s" spread_df = pd.read_sql_query(sql_string, serenitasdb, - params=[index, series, '5yr', value_date.date()]) + params=[index, series, '5yr', value_date]) spread = spread_df.iloc[0][0] sql_string = "SELECT tenor, maturity FROM index_maturity where index = %s and series = %s" - lookup_table = pd.read_sql_query(sql_string, serenitasdb, parse_dates=['maturity'], params=[index, series]) + lookup_table = pd.read_sql_query(sql_string, serenitasdb, parse_dates=['maturity'], + params=[index, series]) df = curve_shape[steepness == steepness.quantile(percentile, 'nearest')] df = df * spread/df['5yr'][0] @@ -279,10 +299,12 @@ def curve_shape(value_date, index='IG', percentile=.95, spread=None): df['year_frac'] = (df.maturity - pd.to_datetime(value_date)).dt.days/365 return interp1d(np.hstack([0, df.year_frac]), np.hstack([0, df.spread])) + def pos_pnl_abs(portf, value_date, index='IG', rolling=6, years=3): ''' - Runs PNL analysis on portf using historical on-the-run spread levels - off-the-runs spreads are duration linearly interpolated''' + Runs PNL analysis on portf using historical on-the-run spread levels - + off-the-runs spreads are duration linearly interpolated''' series = on_the_run(index) df = get_index_quotes(index, list(range(series - rolling, series + 1)), @@ -290,7 +312,8 @@ def pos_pnl_abs(portf, value_date, index='IG', rolling=6, years=3): df = df.groupby(level=['date', 'tenor']).nth(-1)['closespread'].unstack(-1) sql_string = "SELECT tenor, maturity FROM index_maturity where index = %s and series = %s" - lookup_table = pd.read_sql_query(sql_string, serenitasdb, parse_dates=['maturity'], params=[index, series]) + lookup_table = pd.read_sql_query(sql_string, serenitasdb, parse_dates=['maturity'], + params=[index, series]) lookup_table['year_frac'] = (lookup_table.maturity - pd.to_datetime(value_date)).dt.days/365 portf_copy = deepcopy(portf) @@ -305,6 +328,7 @@ def pos_pnl_abs(portf, value_date, index='IG', rolling=6, years=3): df = pd.DataFrame.from_records(chain(*r), columns=['date', 'five_yr_spread', 'pnl']) return df.set_index('date') + def curve_scen_table(portf, shock=10): ''' Runs PNL scenario on portf by shocking different points on the curve. @@ -318,7 +342,7 @@ def curve_scen_table(portf, shock=10): shocks = np.full(4, 0) shocks[i+1] += shock shocks[j+1] -= shock - #f is the shock amount interpolated based on tenor + # f is the shock amount interpolated based on tenor f = interp1d(np.hstack([0, otr_year_frac]), shocks) portf_copy = deepcopy(portf) portf_copy.reset_pv() diff --git a/python/analytics/index_data.py b/python/analytics/index_data.py index 5cfb490a..439f80fc 100644 --- a/python/analytics/index_data.py +++ b/python/analytics/index_data.py @@ -2,8 +2,7 @@ from .db import _engine, dbconn from dates import bond_cal import numpy as np -from .utils import roll_date, previous_twentieth, tenor_t -from pandas.tseries.offsets import BDay +from .utils import tenor_t from functools import lru_cache from pyisda.curve import SpreadCurve from multiprocessing import Pool @@ -12,10 +11,11 @@ from yieldcurve import get_curve import datetime import pandas as pd + def insert_quotes(): """ - backpopulate some version i+1 quotes one day before they start trading so that - we get continuous time series when we compute returns. + backpopulate some version i+1 quotes one day before they start trading so + that we get continuous time series when we compute returns. We can also do it in sql as follows: @@ -25,28 +25,29 @@ def insert_quotes(): WHERE index='HY' and series=23 and date='2017-02-02' """ - dates = pd.DatetimeIndex(['2014-05-21', '2015-02-19', '2015-03-05','2015-06-23']) - df = pd.read_sql_query("SELECT DISTINCT ON (date) * FROM index_quotes " \ - "WHERE index='HY' AND tenor='5yr' " \ + dates = pd.DatetimeIndex(['2014-05-21', '2015-02-19', '2015-03-05', '2015-06-23']) + df = pd.read_sql_query("SELECT DISTINCT ON (date) * FROM index_quotes " + "WHERE index='HY' AND tenor='5yr' " "ORDER BY date, series DESC, version DESC", _engine, parse_dates=['date'], index_col=['date']) df = df.loc[dates] for tup in df.itertuples(): - result = serenitasdb.execute("SELECT indexfactor, cumulativeloss FROM index_version " \ - "WHERE index = 'HY' AND series=%s AND version in (%s, %s)" \ - "ORDER BY version", - (tup.series, tup.version, tup.version+1)) + result = _engine.execute("SELECT indexfactor, cumulativeloss FROM index_version " + "WHERE index = 'HY' AND series=%s AND version in (%s, %s)" + "ORDER BY version", + (tup.series, tup.version, tup.version+1)) factor1, cumloss1 = result.fetchone() factor2, cumloss2 = result.fetchone() recovery = 1-(cumloss2-cumloss1) version2_price = (factor1 * tup.closeprice - 100*recovery)/factor2 print(version2_price) - _engine.execute("INSERT INTO index_quotes(date, index, series, version, tenor, closeprice)" \ - "VALUES(%s, %s, %s, %s, %s, %s)", - (tup.Index, 'HY', tup.series, tup.version+1, tup.tenor, version2_price)) + _engine.execute("INSERT INTO index_quotes(date, index, series, version, tenor, closeprice)" + "VALUES(%s, %s, %s, %s, %s, %s)", + (tup.Index, 'HY', tup.series, tup.version+1, tup.tenor, version2_price)) + -def get_index_quotes(index=None, series=None, tenor=None, from_date=None, years=3, - remove_holidays=True): +def get_index_quotes(index=None, series=None, tenor=None, from_date=None, + years=3, remove_holidays=True): args = locals().copy() del args['remove_holidays'] if args['years'] is not None: @@ -55,6 +56,7 @@ def get_index_quotes(index=None, series=None, tenor=None, from_date=None, years= if args['from_date']: args['date'] = args['from_date'] del args['from_date'] + def make_str(key, val): if isinstance(val, list): op = "IN" @@ -77,19 +79,21 @@ def get_index_quotes(index=None, series=None, tenor=None, from_date=None, years= df = pd.read_sql_query(sql_str, _engine, parse_dates=['date'], index_col=['date', 'index', 'series', 'version'], - params = make_params(args)) + params=make_params(args)) df.tenor = df.tenor.astype(tenor_t) df = df.set_index('tenor', append=True) df.sort_index(inplace=True) - ## get rid of US holidays + # get rid of US holidays if remove_holidays: dates = df.index.levels[0] if index in ['IG', 'HY']: holidays = bond_cal().holidays(start=dates[0], end=dates[-1]) - df = df.loc(axis=0)[dates.difference(holidays),:,:] + df = df.loc(axis=0)[dates.difference(holidays), :, :] return df -def index_returns(df=None, index=None, series=None, tenor=None, from_date=None, years=3, per=1): + +def index_returns(df=None, index=None, series=None, tenor=None, from_date=None, + years=3, per=1): """computes daily spreads and price returns Parameters @@ -112,12 +116,13 @@ def index_returns(df=None, index=None, series=None, tenor=None, from_date=None, df = get_index_quotes(index, series, tenor, from_date, years) df = (df. groupby(level=['index', 'series', 'tenor', 'version']) - [['closespread','closeprice']]. - pct_change(periods=per)) + [['closespread', 'closeprice']]. + pct_change(periods=per)) df.columns = ['spread_return', 'price_return'] df = df.groupby(level=['date', 'index', 'series', 'tenor']).nth(0) - coupon_data = pd.read_sql_query("SELECT index, series, tenor, coupon * 1e-4 AS coupon, maturity FROM " \ - "index_maturity WHERE coupon is NOT NULL", _engine, + coupon_data = pd.read_sql_query("SELECT index, series, tenor, coupon * 1e-4 AS coupon, maturity FROM " + "index_maturity WHERE coupon is NOT NULL", + _engine, index_col=['index', 'series', 'tenor']) df = df.reset_index('date').join(coupon_data).reset_index('tenor') df.tenor = df.tenor.astype(tenor_t) @@ -129,12 +134,14 @@ def index_returns(df=None, index=None, series=None, tenor=None, from_date=None, df = df.drop(['day_frac', 'coupon', 'maturity'], axis=1) return df.set_index(['date'], append=True) + def get_singlenames_quotes(indexname, date): conn = dbconn('serenitasdb') with conn.cursor() as c: c.execute("SELECT * FROM curve_quotes(%s, %s)", vars=(indexname, date)) return [r for r in c] + def build_curve(r, tenors, currency="USD"): spread_curve = 1e-4 * np.array(r['spread_curve'], dtype='float') upfront_curve = 1e-2 * np.array(r['upfront_curve'], dtype='float') @@ -149,31 +156,36 @@ def build_curve(r, tenors, currency="USD"): return None return sc + def build_curves(quotes, args): return [build_curve(q, *args) for q in quotes if q is not None] + def build_curves_dist(quotes, args, workers=4): - ## about twice as fast as the non distributed version - ## non thread safe for some reason so need ProcessPool + # about twice as fast as the non distributed version + # non thread safe for some reason so need ProcessPool with Pool(workers) as pool: r = pool.starmap(build_curve, [(q, *args) for q in quotes], 30) return r + @lru_cache(maxsize=16) -def _get_singlenames_curves(index_type, series, trade_date): - tenors = np.array([0.5, 1, 2, 3, 4, 5, 7, 10]) +def _get_singlenames_curves(index_type, series, trade_date, tenors): sn_quotes = get_singlenames_quotes(f"{index_type.lower()}{series}", trade_date) currency = "EUR" if index_type in ["XO", "EU"] else "USD" args = (tenors, currency) return build_curves_dist(sn_quotes, args) -def get_singlenames_curves(index_type, series, trade_date): - tenors = np.array([0.5, 1, 2, 3, 4, 5, 7, 10]) + +def get_singlenames_curves(index_type, series, trade_date, + tenors=np.array([0.5, 1, 2, 3, 4, 5, 7, 10])): if isinstance(trade_date, pd.Timestamp): trade_date = trade_date.date() return _get_singlenames_curves(index_type, series, - min(datetime.date.today(), trade_date)) + min(datetime.date.today(), trade_date), + tenors) + def get_tranche_quotes(index_type, series, tenor, date=datetime.date.today()): conn = dbconn('serenitasdb') |
