aboutsummaryrefslogtreecommitdiffstats
path: root/python/analytics
diff options
context:
space:
mode:
Diffstat (limited to 'python/analytics')
-rw-r--r--python/analytics/curve_trades.py62
-rw-r--r--python/analytics/index_data.py74
2 files changed, 86 insertions, 50 deletions
diff --git a/python/analytics/curve_trades.py b/python/analytics/curve_trades.py
index 36ff319e..4fd7641e 100644
--- a/python/analytics/curve_trades.py
+++ b/python/analytics/curve_trades.py
@@ -20,13 +20,14 @@ dawndb = dbengine('dawndb')
def on_the_run(index):
r = serenitasdb.execute("SELECT max(series) FROM index_version WHERE index=%s",
- (index,))
+ (index,))
series, = r.fetchone()
return series
+
def curve_spread_diff(index='IG', rolling=6, years=3, percentage=False, percentage_base='5yr'):
otr = on_the_run(index)
- ## look at spreads
+ # look at spreads
df = get_index_quotes(index, list(range(otr - rolling, otr + 1)),
tenor=['3yr', '5yr', '7yr', '10yr'], years=years)
spreads = df.groupby(level=['date', 'tenor']).nth(-1)['closespread'].unstack(-1)
@@ -35,18 +36,21 @@ def curve_spread_diff(index='IG', rolling=6, years=3, percentage=False, percenta
spreads_diff.columns = ['3-5', '5-7', '7-10']
spreads_diff['5-10'] = spreads_diff['5-7'] + spreads_diff['7-10']
if percentage is True:
- spreads_diff = spreads.apply(lambda df: df/df[percentage_base], axis = 1)
+ spreads_diff = spreads.apply(lambda df: df/df[percentage_base], axis=1)
return spreads_diff
+
def spreads_diff_table(spreads_diff):
def current(s):
return s.iat[-1]
+
def zscore(s):
return (s.iat[-1] - s.mean()) / s.std()
- df = spreads_diff.agg(['min', 'max','mean', current, zscore])
+ df = spreads_diff.agg(['min', 'max', 'mean', current, zscore])
((spreads_diff - spreads_diff.mean())/spreads_diff.std()).plot()
return df
+
def theta_matrix_by_series(index='IG', rolling=6):
otr = on_the_run(index)
df = get_index_quotes(index, list(range(otr - rolling, otr + 1)),
@@ -56,17 +60,20 @@ def theta_matrix_by_series(index='IG', rolling=6):
theta_matrix = theta_matrix.loc[theta_matrix.index[-1][0]].unstack(0)
return theta_matrix[['3yr', '5yr', '7yr', '10yr']]
+
def ratio_within_series(index='IG', rolling=6, param='duration'):
otr = on_the_run(index)
df = get_index_quotes(index, list(range(otr - rolling, otr + 1)),
tenor=['3yr', '5yr', '7yr', '10yr']).unstack()
ratio = (df[param].
apply(lambda s: s / df[param]['5yr'].values, raw=True))
- ratio.columns = pd.MultiIndex.from_product([[param + '_ratio_to_5yr'], ratio.columns])
+ ratio.columns = pd.MultiIndex.from_product([[f"{param}_ratio_to_5yr"],
+ ratio.columns])
df = df.join(ratio).groupby(['date']).tail(1)
df = df.reset_index(level=['index', 'version'], drop=True)
return df
+
def on_the_run_theta(index='IG', rolling=6):
otr = on_the_run(index)
df = get_index_quotes(index, list(range(otr - rolling, otr + 1)),
@@ -75,12 +82,13 @@ def on_the_run_theta(index='IG', rolling=6):
theta_matrix = df.groupby(level=['date', 'tenor']).nth(-1)['theta_per_dur']
theta_matrix.unstack(-1).plot()
+
def curve_returns(index='IG', rolling=6):
- ## look at returns
+ # look at returns
otr = on_the_run(index)
df = index_returns(index=index, series=list(range(otr - rolling, otr + 1)),
tenor=['3yr', '5yr', '7yr', '10yr'])
- ## on-the-run returns
+ # on-the-run returns
df = df.reset_index().set_index(['date', 'series', 'tenor'])
returns = df.price_return.dropna().unstack(-1).groupby(level='date').nth(-1)
@@ -103,26 +111,28 @@ def curve_returns(index='IG', rolling=6):
results = strategies_return.agg([sharpe, lambda df: df.nsmallest(10).mean()])
sharpe_monthly = strategies_return_monthly.agg(sharpe, period="monthly")
sharpe_monthly.name = 'Monthly Sharpe'
- results.index=['Sharpe', 'Mean Worst 10 Days DrawDown']
+ results.index = ['Sharpe', 'Mean Worst 10 Days DrawDown']
return results.append(sharpe_monthly)
+
def cross_series_curve(index='IG', rolling=6):
otr = on_the_run(index)
df = index_returns(index= index, series=list(range(otr - rolling, otr + 1)),
tenor=['3yr', '5yr', '7yr', '10yr'])
- ## look cross series - 3y to 5y
+ # look cross series - 3y to 5y
df = df.reset_index().set_index(['date', 'index', 'tenor', 'series'])
- returns1 = df.xs(['5yr', index], level = ['tenor','index']).price_return.unstack(-1)
+ returns1 = df.xs(['5yr', index], level=['tenor','index']).price_return.unstack(-1)
price_diff = pd.DataFrame()
for ind in list(range(otr - 2, otr + 1)):
price_diff[ind] = returns1[ind] - 1.6 * returns1[ind - 4]
- price_diff = price_diff.stack().groupby(level = 'date').nth(-1)
+ price_diff = price_diff.stack().groupby(level='date').nth(-1)
monthly_returns_cross_series = (price_diff.
groupby(pd.Grouper(freq='M')).
agg(lambda df: (1 + df).prod() - 1))
plt.plot(monthly_returns_cross_series)
+
def forward_loss(index='IG'):
start_date = (pd.Timestamp.now() - pd.DateOffset(years=3)).date()
@@ -140,6 +150,7 @@ def forward_loss(index='IG'):
# annual change, to take out some noise
df['fwd_loss_rate'] = df.indexel.diff(2)/df.duration.diff(2)
+
def curve_model(tenor_1='5yr', tenor_2='10yr'):
#OLS model
df = ratio_within_series(param='closespread')
@@ -155,6 +166,7 @@ def curve_model(tenor_1='5yr', tenor_2='10yr'):
data=df).fit()
return df, ols_model
+
def curve_model_results(df, model):
df = df.dropna()
prstd_ols, df['down_2_stdev'], df['up_2_stdev'] = wls_prediction_std(model)
@@ -168,9 +180,10 @@ def curve_model_results(df, model):
df['dr_dspread'] = np.exp(model.params[0]) * model.params[2] * df.duration1 ** model.params[1] * df.closespread ** (model.params[2] - 1)
return df
+
def spread_fin_crisis(index='IG'):
otr = on_the_run(index)
- ## look at spreads
+ # look at spreads
df = get_index_quotes(index, list(range(8, otr + 1)),
tenor=['3yr', '5yr', '7yr', '10yr'], years=20)
spreads = df.groupby(level=['date', 'tenor']).nth(-1)['closespread'].unstack(-1)
@@ -192,6 +205,7 @@ def spread_fin_crisis(index='IG'):
plt.show()
+
def forward_spread(report_date, index='IG', series=None, tenors=['3yr', '5yr', '7yr', '10yr']):
if series is None:
@@ -206,6 +220,7 @@ def forward_spread(report_date, index='IG', series=None, tenors=['3yr', '5yr', '
f_spread.append(b_index.spread())
return pd.concat(f_spread, keys=date_range).unstack(-1)
+
def spot_forward(index='IG', series=None, tenors=['3yr', '5yr', '7yr', '10yr']):
'''
@@ -231,6 +246,7 @@ def spot_forward(index='IG', series=None, tenors=['3yr', '5yr', '7yr', '10yr']):
df['maturity'] = [b_index.value_date, maturity_1yr] + b_index.maturities
return df.reset_index().set_index('maturity')
+
def curve_pos(value_date, index='IG'):
'''
@@ -255,22 +271,26 @@ def curve_pos(value_date, index='IG'):
portf.mark()
return portf
+
def curve_shape(value_date, index='IG', percentile=.95, spread=None):
'''
- Returns a function to linearly interpolate between the curve based on maturity (in years)'''
+ Returns a function to linearly interpolate between the curve
+ based on maturity (in years)'''
curve_shape = curve_spread_diff(index, 10, 5, True)
steepness = (curve_shape['10yr']/curve_shape['3yr'])
series = on_the_run(index)
if spread is None:
- sql_string = "SELECT closespread FROM index_quotes where index = %s and series = %s and tenor = %s and date = %s"
+ sql_string = "SELECT closespread FROM index_quotes where index = %s " \
+ "and series = %s and tenor = %s and date = %s"
spread_df = pd.read_sql_query(sql_string, serenitasdb,
- params=[index, series, '5yr', value_date.date()])
+ params=[index, series, '5yr', value_date])
spread = spread_df.iloc[0][0]
sql_string = "SELECT tenor, maturity FROM index_maturity where index = %s and series = %s"
- lookup_table = pd.read_sql_query(sql_string, serenitasdb, parse_dates=['maturity'], params=[index, series])
+ lookup_table = pd.read_sql_query(sql_string, serenitasdb, parse_dates=['maturity'],
+ params=[index, series])
df = curve_shape[steepness == steepness.quantile(percentile, 'nearest')]
df = df * spread/df['5yr'][0]
@@ -279,10 +299,12 @@ def curve_shape(value_date, index='IG', percentile=.95, spread=None):
df['year_frac'] = (df.maturity - pd.to_datetime(value_date)).dt.days/365
return interp1d(np.hstack([0, df.year_frac]), np.hstack([0, df.spread]))
+
def pos_pnl_abs(portf, value_date, index='IG', rolling=6, years=3):
'''
- Runs PNL analysis on portf using historical on-the-run spread levels - off-the-runs spreads are duration linearly interpolated'''
+ Runs PNL analysis on portf using historical on-the-run spread levels -
+ off-the-runs spreads are duration linearly interpolated'''
series = on_the_run(index)
df = get_index_quotes(index, list(range(series - rolling, series + 1)),
@@ -290,7 +312,8 @@ def pos_pnl_abs(portf, value_date, index='IG', rolling=6, years=3):
df = df.groupby(level=['date', 'tenor']).nth(-1)['closespread'].unstack(-1)
sql_string = "SELECT tenor, maturity FROM index_maturity where index = %s and series = %s"
- lookup_table = pd.read_sql_query(sql_string, serenitasdb, parse_dates=['maturity'], params=[index, series])
+ lookup_table = pd.read_sql_query(sql_string, serenitasdb, parse_dates=['maturity'],
+ params=[index, series])
lookup_table['year_frac'] = (lookup_table.maturity - pd.to_datetime(value_date)).dt.days/365
portf_copy = deepcopy(portf)
@@ -305,6 +328,7 @@ def pos_pnl_abs(portf, value_date, index='IG', rolling=6, years=3):
df = pd.DataFrame.from_records(chain(*r), columns=['date', 'five_yr_spread', 'pnl'])
return df.set_index('date')
+
def curve_scen_table(portf, shock=10):
'''
Runs PNL scenario on portf by shocking different points on the curve.
@@ -318,7 +342,7 @@ def curve_scen_table(portf, shock=10):
shocks = np.full(4, 0)
shocks[i+1] += shock
shocks[j+1] -= shock
- #f is the shock amount interpolated based on tenor
+ # f is the shock amount interpolated based on tenor
f = interp1d(np.hstack([0, otr_year_frac]), shocks)
portf_copy = deepcopy(portf)
portf_copy.reset_pv()
diff --git a/python/analytics/index_data.py b/python/analytics/index_data.py
index 5cfb490a..439f80fc 100644
--- a/python/analytics/index_data.py
+++ b/python/analytics/index_data.py
@@ -2,8 +2,7 @@ from .db import _engine, dbconn
from dates import bond_cal
import numpy as np
-from .utils import roll_date, previous_twentieth, tenor_t
-from pandas.tseries.offsets import BDay
+from .utils import tenor_t
from functools import lru_cache
from pyisda.curve import SpreadCurve
from multiprocessing import Pool
@@ -12,10 +11,11 @@ from yieldcurve import get_curve
import datetime
import pandas as pd
+
def insert_quotes():
"""
- backpopulate some version i+1 quotes one day before they start trading so that
- we get continuous time series when we compute returns.
+ backpopulate some version i+1 quotes one day before they start trading so
+ that we get continuous time series when we compute returns.
We can also do it in sql as follows:
@@ -25,28 +25,29 @@ def insert_quotes():
WHERE index='HY' and series=23 and date='2017-02-02'
"""
- dates = pd.DatetimeIndex(['2014-05-21', '2015-02-19', '2015-03-05','2015-06-23'])
- df = pd.read_sql_query("SELECT DISTINCT ON (date) * FROM index_quotes " \
- "WHERE index='HY' AND tenor='5yr' " \
+ dates = pd.DatetimeIndex(['2014-05-21', '2015-02-19', '2015-03-05', '2015-06-23'])
+ df = pd.read_sql_query("SELECT DISTINCT ON (date) * FROM index_quotes "
+ "WHERE index='HY' AND tenor='5yr' "
"ORDER BY date, series DESC, version DESC",
_engine, parse_dates=['date'], index_col=['date'])
df = df.loc[dates]
for tup in df.itertuples():
- result = serenitasdb.execute("SELECT indexfactor, cumulativeloss FROM index_version " \
- "WHERE index = 'HY' AND series=%s AND version in (%s, %s)" \
- "ORDER BY version",
- (tup.series, tup.version, tup.version+1))
+ result = _engine.execute("SELECT indexfactor, cumulativeloss FROM index_version "
+ "WHERE index = 'HY' AND series=%s AND version in (%s, %s)"
+ "ORDER BY version",
+ (tup.series, tup.version, tup.version+1))
factor1, cumloss1 = result.fetchone()
factor2, cumloss2 = result.fetchone()
recovery = 1-(cumloss2-cumloss1)
version2_price = (factor1 * tup.closeprice - 100*recovery)/factor2
print(version2_price)
- _engine.execute("INSERT INTO index_quotes(date, index, series, version, tenor, closeprice)" \
- "VALUES(%s, %s, %s, %s, %s, %s)",
- (tup.Index, 'HY', tup.series, tup.version+1, tup.tenor, version2_price))
+ _engine.execute("INSERT INTO index_quotes(date, index, series, version, tenor, closeprice)"
+ "VALUES(%s, %s, %s, %s, %s, %s)",
+ (tup.Index, 'HY', tup.series, tup.version+1, tup.tenor, version2_price))
+
-def get_index_quotes(index=None, series=None, tenor=None, from_date=None, years=3,
- remove_holidays=True):
+def get_index_quotes(index=None, series=None, tenor=None, from_date=None,
+ years=3, remove_holidays=True):
args = locals().copy()
del args['remove_holidays']
if args['years'] is not None:
@@ -55,6 +56,7 @@ def get_index_quotes(index=None, series=None, tenor=None, from_date=None, years=
if args['from_date']:
args['date'] = args['from_date']
del args['from_date']
+
def make_str(key, val):
if isinstance(val, list):
op = "IN"
@@ -77,19 +79,21 @@ def get_index_quotes(index=None, series=None, tenor=None, from_date=None, years=
df = pd.read_sql_query(sql_str, _engine, parse_dates=['date'],
index_col=['date', 'index', 'series', 'version'],
- params = make_params(args))
+ params=make_params(args))
df.tenor = df.tenor.astype(tenor_t)
df = df.set_index('tenor', append=True)
df.sort_index(inplace=True)
- ## get rid of US holidays
+ # get rid of US holidays
if remove_holidays:
dates = df.index.levels[0]
if index in ['IG', 'HY']:
holidays = bond_cal().holidays(start=dates[0], end=dates[-1])
- df = df.loc(axis=0)[dates.difference(holidays),:,:]
+ df = df.loc(axis=0)[dates.difference(holidays), :, :]
return df
-def index_returns(df=None, index=None, series=None, tenor=None, from_date=None, years=3, per=1):
+
+def index_returns(df=None, index=None, series=None, tenor=None, from_date=None,
+ years=3, per=1):
"""computes daily spreads and price returns
Parameters
@@ -112,12 +116,13 @@ def index_returns(df=None, index=None, series=None, tenor=None, from_date=None,
df = get_index_quotes(index, series, tenor, from_date, years)
df = (df.
groupby(level=['index', 'series', 'tenor', 'version'])
- [['closespread','closeprice']].
- pct_change(periods=per))
+ [['closespread', 'closeprice']].
+ pct_change(periods=per))
df.columns = ['spread_return', 'price_return']
df = df.groupby(level=['date', 'index', 'series', 'tenor']).nth(0)
- coupon_data = pd.read_sql_query("SELECT index, series, tenor, coupon * 1e-4 AS coupon, maturity FROM " \
- "index_maturity WHERE coupon is NOT NULL", _engine,
+ coupon_data = pd.read_sql_query("SELECT index, series, tenor, coupon * 1e-4 AS coupon, maturity FROM "
+ "index_maturity WHERE coupon is NOT NULL",
+ _engine,
index_col=['index', 'series', 'tenor'])
df = df.reset_index('date').join(coupon_data).reset_index('tenor')
df.tenor = df.tenor.astype(tenor_t)
@@ -129,12 +134,14 @@ def index_returns(df=None, index=None, series=None, tenor=None, from_date=None,
df = df.drop(['day_frac', 'coupon', 'maturity'], axis=1)
return df.set_index(['date'], append=True)
+
def get_singlenames_quotes(indexname, date):
conn = dbconn('serenitasdb')
with conn.cursor() as c:
c.execute("SELECT * FROM curve_quotes(%s, %s)", vars=(indexname, date))
return [r for r in c]
+
def build_curve(r, tenors, currency="USD"):
spread_curve = 1e-4 * np.array(r['spread_curve'], dtype='float')
upfront_curve = 1e-2 * np.array(r['upfront_curve'], dtype='float')
@@ -149,31 +156,36 @@ def build_curve(r, tenors, currency="USD"):
return None
return sc
+
def build_curves(quotes, args):
return [build_curve(q, *args) for q in quotes if q is not None]
+
def build_curves_dist(quotes, args, workers=4):
- ## about twice as fast as the non distributed version
- ## non thread safe for some reason so need ProcessPool
+ # about twice as fast as the non distributed version
+ # non thread safe for some reason so need ProcessPool
with Pool(workers) as pool:
r = pool.starmap(build_curve, [(q, *args) for q in quotes], 30)
return r
+
@lru_cache(maxsize=16)
-def _get_singlenames_curves(index_type, series, trade_date):
- tenors = np.array([0.5, 1, 2, 3, 4, 5, 7, 10])
+def _get_singlenames_curves(index_type, series, trade_date, tenors):
sn_quotes = get_singlenames_quotes(f"{index_type.lower()}{series}",
trade_date)
currency = "EUR" if index_type in ["XO", "EU"] else "USD"
args = (tenors, currency)
return build_curves_dist(sn_quotes, args)
-def get_singlenames_curves(index_type, series, trade_date):
- tenors = np.array([0.5, 1, 2, 3, 4, 5, 7, 10])
+
+def get_singlenames_curves(index_type, series, trade_date,
+ tenors=np.array([0.5, 1, 2, 3, 4, 5, 7, 10])):
if isinstance(trade_date, pd.Timestamp):
trade_date = trade_date.date()
return _get_singlenames_curves(index_type, series,
- min(datetime.date.today(), trade_date))
+ min(datetime.date.today(), trade_date),
+ tenors)
+
def get_tranche_quotes(index_type, series, tenor, date=datetime.date.today()):
conn = dbconn('serenitasdb')