from db import dbengine, dbconn from dates import bond_cal import numpy as np from analytics.utils import roll_date, previous_twentieth from pandas.tseries.offsets import BDay from pyisda.curve import SpreadCurve, fill_curve from multiprocessing import Pool from yieldcurve import get_curve import datetime import pandas as pd _serenitas_engine = dbengine('serenitasdb') def insert_quotes(): """ backpopulate some version i+1 quotes one day before they start trading so that we get continuous time series when we compute returns. We can also do it in sql as follows: INSERT INTO index_quotes(date, index, series, version, tenor, closeprice) SELECT date, index, series, version+1, tenor, (factor1*closeprice-100*0.355)/factor2 FROM index_quotes WHERE index='HY' and series=23 and date='2017-02-02' """ dates = pd.DatetimeIndex(['2014-05-21', '2015-02-19', '2015-03-05','2015-06-23']) df = pd.read_sql_query("SELECT DISTINCT ON (date) * FROM index_quotes " \ "WHERE index='HY' AND tenor='5yr' " \ "ORDER BY date, series DESC, version DESC", _serenitas_engine, parse_dates=['date'], index_col=['date']) df = df.loc[dates] for tup in df.itertuples(): result = serenitasdb.execute("SELECT indexfactor, cumulativeloss FROM index_version " \ "WHERE index = 'HY' AND series=%s AND version in (%s, %s)" \ "ORDER BY version", (tup.series, tup.version, tup.version+1)) factor1, cumloss1 = result.fetchone() factor2, cumloss2 = result.fetchone() recovery = 1-(cumloss2-cumloss1) version2_price = (factor1 * tup.closeprice - 100*recovery)/factor2 print(version2_price) _serenitas_engine.execute("INSERT INTO index_quotes(date, index, series, version, tenor, closeprice)" \ "VALUES(%s, %s, %s, %s, %s, %s)", (tup.Index, 'HY', tup.series, tup.version+1, tup.tenor, version2_price)) def get_index_quotes(index=None, series=None, tenor=None, from_date=None, years=3): args = locals().copy() if args['years'] is not None: args['date'] = (pd.Timestamp.now() - pd.DateOffset(years=years)).date() del args['years'] if args['from_date']: args['date'] = args['from_date'] del args['from_date'] def make_str(key, val): if isinstance(val, list): op = "IN" return "{} IN %({})s".format(key, key) elif isinstance(val, datetime.date): op = ">=" else: op = "=" return "{} {} %({})s".format(key, op, key) where_clause = " AND ".join(make_str(k, v) for k, v in args.items() if v is not None) sql_str = "SELECT * FROM index_quotes" if where_clause: sql_str = " WHERE ".join([sql_str, where_clause]) def make_params(args): return {k: tuple(v) if isinstance(v, list) else v for k, v in args.items() if v is not None} df = pd.read_sql_query(sql_str, _serenitas_engine, parse_dates=['date'], index_col=['date', 'index', 'series', 'version'], params = make_params(args)) df.tenor = df.tenor.astype('category', categories=("3yr", "5yr", "7yr", "10yr"), ordered=True) df = df.set_index('tenor', append=True) df.sort_index(inplace=True) ## get rid of US holidays dates = df.index.levels[0] if index in ['IG', 'HY']: holidays = bond_cal().holidays(start=dates[0], end=dates[-1]) df = df.loc(axis=0)[dates.difference(holidays),:,:] return df def index_returns(df=None, index=None, series=None, tenor=None, from_date=None, years=3, per=1): """computes daily spreads and price returns Parameters ---------- df : pandas.DataFrame index : str or List[str], optional index type, one of 'IG', 'HY', 'EU', 'XO' series : int or List[int], optional tenor : str or List[str], optional tenor in years e.g: '3yr', '5yr' date : datetime.date, optional starting date years : int, optional limits many years do we go back starting from today. per: int, optional calculate returns across different time frames """ if df is None: df = get_index_quotes(index, series, tenor, from_date, years) df = (df. groupby(level=['index', 'series', 'tenor', 'version']) [['closespread','closeprice']]. pct_change(periods=per)) df.columns = ['spread_return', 'price_return'] df = df.groupby(level=['date', 'index', 'series', 'tenor']).nth(0) coupon_data = pd.read_sql_query("SELECT index, series, tenor, coupon FROM " \ "index_maturity WHERE coupon is NOT NULL", _serenitas_engine, index_col=['index', 'series', 'tenor']) def add_accrued(df): coupon = coupon_data.loc[df.index[0][1:],'coupon'] * 1e-4 accrued = (df.index.levels[0].to_series().diff(periods=per). astype('timedelta64[D]')/360 * coupon) return df + accrued df['price_return'] = (df. groupby(level=['index', 'series', 'tenor'])['price_return']. transform(add_accrued)) return df def get_singlenames_quotes(indexname, date): conn = dbconn('serenitasdb') with conn.cursor() as c: c.execute("SELECT * FROM curve_quotes(%s, %s)", vars=(indexname, date)) return [r for r in c] def build_curve(r, today_date, yc, start_date, step_in_date, value_date, end_dates): spread_curve = 1e-4 * np.array(r['spread_curve'][1:], dtype='float') upfront_curve = 1e-2 * np.array(r['upfront_curve'][1:], dtype='float') recovery_curve = np.array(r['recovery_curve'][1:], dtype='float') try: sc = SpreadCurve(today_date, yc, start_date, step_in_date, value_date, end_dates, spread_curve, upfront_curve, recovery_curve, ticker=r['cds_ticker']) if len(sc) != end_dates.shape[0]: sc = fill_curve(sc, end_dates) except ValueError as e: print(r[0], e) return None return sc def build_curves(quotes, args): return [build_curve(q, *args) for q in quotes if q is not None] def build_curves_dist(quotes, args, workers=4): ## about twice as fast as the non distributed version ## non thread safe for some reason so need ProcessPool with Pool(workers) as pool: r = pool.starmap(build_curve, [(q, *args) for q in quotes], 30) return r def get_singlenames_curves(index_type, series, trade_date): end_dates = roll_date(trade_date, [1, 2, 3, 4, 5, 7, 10], nd_array=True) sn_quotes = get_singlenames_quotes("{}{}".format(index_type.lower(), series), trade_date.date()) jp_yc = get_curve(trade_date) start_date = previous_twentieth(trade_date) step_in_date = trade_date + datetime.timedelta(days=1) value_date = pd.Timestamp(trade_date) + 3* BDay() args = (trade_date, jp_yc, start_date, step_in_date, value_date, end_dates) curves = build_curves_dist(sn_quotes, args) return curves, args