diff options
Diffstat (limited to 'python/analytics/index_data.py')
| -rw-r--r-- | python/analytics/index_data.py | 74 |
1 files changed, 43 insertions, 31 deletions
diff --git a/python/analytics/index_data.py b/python/analytics/index_data.py index 5cfb490a..439f80fc 100644 --- a/python/analytics/index_data.py +++ b/python/analytics/index_data.py @@ -2,8 +2,7 @@ from .db import _engine, dbconn from dates import bond_cal import numpy as np -from .utils import roll_date, previous_twentieth, tenor_t -from pandas.tseries.offsets import BDay +from .utils import tenor_t from functools import lru_cache from pyisda.curve import SpreadCurve from multiprocessing import Pool @@ -12,10 +11,11 @@ from yieldcurve import get_curve import datetime import pandas as pd + def insert_quotes(): """ - backpopulate some version i+1 quotes one day before they start trading so that - we get continuous time series when we compute returns. + backpopulate some version i+1 quotes one day before they start trading so + that we get continuous time series when we compute returns. We can also do it in sql as follows: @@ -25,28 +25,29 @@ def insert_quotes(): WHERE index='HY' and series=23 and date='2017-02-02' """ - dates = pd.DatetimeIndex(['2014-05-21', '2015-02-19', '2015-03-05','2015-06-23']) - df = pd.read_sql_query("SELECT DISTINCT ON (date) * FROM index_quotes " \ - "WHERE index='HY' AND tenor='5yr' " \ + dates = pd.DatetimeIndex(['2014-05-21', '2015-02-19', '2015-03-05', '2015-06-23']) + df = pd.read_sql_query("SELECT DISTINCT ON (date) * FROM index_quotes " + "WHERE index='HY' AND tenor='5yr' " "ORDER BY date, series DESC, version DESC", _engine, parse_dates=['date'], index_col=['date']) df = df.loc[dates] for tup in df.itertuples(): - result = serenitasdb.execute("SELECT indexfactor, cumulativeloss FROM index_version " \ - "WHERE index = 'HY' AND series=%s AND version in (%s, %s)" \ - "ORDER BY version", - (tup.series, tup.version, tup.version+1)) + result = _engine.execute("SELECT indexfactor, cumulativeloss FROM index_version " + "WHERE index = 'HY' AND series=%s AND version in (%s, %s)" + "ORDER BY version", + (tup.series, tup.version, tup.version+1)) factor1, cumloss1 = result.fetchone() factor2, cumloss2 = result.fetchone() recovery = 1-(cumloss2-cumloss1) version2_price = (factor1 * tup.closeprice - 100*recovery)/factor2 print(version2_price) - _engine.execute("INSERT INTO index_quotes(date, index, series, version, tenor, closeprice)" \ - "VALUES(%s, %s, %s, %s, %s, %s)", - (tup.Index, 'HY', tup.series, tup.version+1, tup.tenor, version2_price)) + _engine.execute("INSERT INTO index_quotes(date, index, series, version, tenor, closeprice)" + "VALUES(%s, %s, %s, %s, %s, %s)", + (tup.Index, 'HY', tup.series, tup.version+1, tup.tenor, version2_price)) + -def get_index_quotes(index=None, series=None, tenor=None, from_date=None, years=3, - remove_holidays=True): +def get_index_quotes(index=None, series=None, tenor=None, from_date=None, + years=3, remove_holidays=True): args = locals().copy() del args['remove_holidays'] if args['years'] is not None: @@ -55,6 +56,7 @@ def get_index_quotes(index=None, series=None, tenor=None, from_date=None, years= if args['from_date']: args['date'] = args['from_date'] del args['from_date'] + def make_str(key, val): if isinstance(val, list): op = "IN" @@ -77,19 +79,21 @@ def get_index_quotes(index=None, series=None, tenor=None, from_date=None, years= df = pd.read_sql_query(sql_str, _engine, parse_dates=['date'], index_col=['date', 'index', 'series', 'version'], - params = make_params(args)) + params=make_params(args)) df.tenor = df.tenor.astype(tenor_t) df = df.set_index('tenor', append=True) df.sort_index(inplace=True) - ## get rid of US holidays + # get rid of US holidays if remove_holidays: dates = df.index.levels[0] if index in ['IG', 'HY']: holidays = bond_cal().holidays(start=dates[0], end=dates[-1]) - df = df.loc(axis=0)[dates.difference(holidays),:,:] + df = df.loc(axis=0)[dates.difference(holidays), :, :] return df -def index_returns(df=None, index=None, series=None, tenor=None, from_date=None, years=3, per=1): + +def index_returns(df=None, index=None, series=None, tenor=None, from_date=None, + years=3, per=1): """computes daily spreads and price returns Parameters @@ -112,12 +116,13 @@ def index_returns(df=None, index=None, series=None, tenor=None, from_date=None, df = get_index_quotes(index, series, tenor, from_date, years) df = (df. groupby(level=['index', 'series', 'tenor', 'version']) - [['closespread','closeprice']]. - pct_change(periods=per)) + [['closespread', 'closeprice']]. + pct_change(periods=per)) df.columns = ['spread_return', 'price_return'] df = df.groupby(level=['date', 'index', 'series', 'tenor']).nth(0) - coupon_data = pd.read_sql_query("SELECT index, series, tenor, coupon * 1e-4 AS coupon, maturity FROM " \ - "index_maturity WHERE coupon is NOT NULL", _engine, + coupon_data = pd.read_sql_query("SELECT index, series, tenor, coupon * 1e-4 AS coupon, maturity FROM " + "index_maturity WHERE coupon is NOT NULL", + _engine, index_col=['index', 'series', 'tenor']) df = df.reset_index('date').join(coupon_data).reset_index('tenor') df.tenor = df.tenor.astype(tenor_t) @@ -129,12 +134,14 @@ def index_returns(df=None, index=None, series=None, tenor=None, from_date=None, df = df.drop(['day_frac', 'coupon', 'maturity'], axis=1) return df.set_index(['date'], append=True) + def get_singlenames_quotes(indexname, date): conn = dbconn('serenitasdb') with conn.cursor() as c: c.execute("SELECT * FROM curve_quotes(%s, %s)", vars=(indexname, date)) return [r for r in c] + def build_curve(r, tenors, currency="USD"): spread_curve = 1e-4 * np.array(r['spread_curve'], dtype='float') upfront_curve = 1e-2 * np.array(r['upfront_curve'], dtype='float') @@ -149,31 +156,36 @@ def build_curve(r, tenors, currency="USD"): return None return sc + def build_curves(quotes, args): return [build_curve(q, *args) for q in quotes if q is not None] + def build_curves_dist(quotes, args, workers=4): - ## about twice as fast as the non distributed version - ## non thread safe for some reason so need ProcessPool + # about twice as fast as the non distributed version + # non thread safe for some reason so need ProcessPool with Pool(workers) as pool: r = pool.starmap(build_curve, [(q, *args) for q in quotes], 30) return r + @lru_cache(maxsize=16) -def _get_singlenames_curves(index_type, series, trade_date): - tenors = np.array([0.5, 1, 2, 3, 4, 5, 7, 10]) +def _get_singlenames_curves(index_type, series, trade_date, tenors): sn_quotes = get_singlenames_quotes(f"{index_type.lower()}{series}", trade_date) currency = "EUR" if index_type in ["XO", "EU"] else "USD" args = (tenors, currency) return build_curves_dist(sn_quotes, args) -def get_singlenames_curves(index_type, series, trade_date): - tenors = np.array([0.5, 1, 2, 3, 4, 5, 7, 10]) + +def get_singlenames_curves(index_type, series, trade_date, + tenors=np.array([0.5, 1, 2, 3, 4, 5, 7, 10])): if isinstance(trade_date, pd.Timestamp): trade_date = trade_date.date() return _get_singlenames_curves(index_type, series, - min(datetime.date.today(), trade_date)) + min(datetime.date.today(), trade_date), + tenors) + def get_tranche_quotes(index_type, series, tenor, date=datetime.date.today()): conn = dbconn('serenitasdb') |
