from . import serenitas_engine, serenitas_pool from dates import bond_cal import numpy as np from .utils import tenor_t from functools import lru_cache from pyisda.curve import SpreadCurve, Seniority, DocClause from multiprocessing import Pool from yieldcurve import get_curve import datetime import pandas as pd def insert_quotes(): """ backpopulate some version i+1 quotes one day before they start trading so that we get continuous time series when we compute returns. We can also do it in sql as follows: INSERT INTO index_quotes(date, index, series, version, tenor, closeprice) SELECT date, index, series, version+1, tenor, (factor1*closeprice-100*0.355)/factor2 FROM index_quotes WHERE index='HY' and series=23 and date='2017-02-02' """ dates = pd.DatetimeIndex(["2014-05-21", "2015-02-19", "2015-03-05", "2015-06-23"]) df = pd.read_sql_query( "SELECT DISTINCT ON (date) * FROM index_quotes " "WHERE index='HY' AND tenor='5yr' " "ORDER BY date, series DESC, version DESC", _engine, parse_dates=["date"], index_col=["date"], ) df = df.loc[dates] for tup in df.itertuples(): result = serenitas_engine.execute( "SELECT indexfactor, cumulativeloss FROM index_version " "WHERE index = 'HY' AND series=%s AND version in (%s, %s)" "ORDER BY version", (tup.series, tup.version, tup.version + 1), ) factor1, cumloss1 = result.fetchone() factor2, cumloss2 = result.fetchone() recovery = 1 - (cumloss2 - cumloss1) version2_price = (factor1 * tup.closeprice - 100 * recovery) / factor2 print(version2_price) serenitas_engine.execute( "INSERT INTO index_quotes(date, index, series, version, tenor, closeprice)" "VALUES(%s, %s, %s, %s, %s, %s)", (tup.Index, "HY", tup.series, tup.version + 1, tup.tenor, version2_price), ) def get_index_quotes( index=None, series=None, tenor=None, from_date=None, years=3, remove_holidays=True, source="MKIT", ): args = locals().copy() del args["remove_holidays"] if args["years"] is not None: args["date"] = (pd.Timestamp.now() - pd.DateOffset(years=years)).date() del args["years"] if args["from_date"]: args["date"] = args["from_date"] del args["from_date"] def make_str(key, val): if isinstance(val, list): op = "IN" return "{} IN %({})s".format(key, key) elif isinstance(val, datetime.date): op = ">=" else: op = "=" return "{} {} %({})s".format(key, op, key) where_clause = " AND ".join( make_str(k, v) for k, v in args.items() if v is not None ) sql_str = "SELECT * FROM index_quotes_pre LEFT JOIN index_risk2 USING (id)" if where_clause: sql_str = " WHERE ".join([sql_str, where_clause]) def make_params(args): return { k: tuple(v) if isinstance(v, list) else v for k, v in args.items() if v is not None } df = pd.read_sql_query( sql_str, serenitas_engine, parse_dates=["date"], index_col=["date", "index", "series", "version"], params=make_params(args), ) df.tenor = df.tenor.astype(tenor_t) df = df.set_index("tenor", append=True) df.sort_index(inplace=True) # get rid of US holidays if remove_holidays: dates = df.index.levels[0] if index in ["IG", "HY"]: holidays = bond_cal().holidays(start=dates[0], end=dates[-1]) df = df.loc(axis=0)[dates.difference(holidays), :, :] return df def index_returns( df=None, index=None, series=None, tenor=None, from_date=None, years=3, per=1 ): """computes spreads and price returns Parameters ---------- df : pandas.DataFrame index : str or List[str], optional index type, one of 'IG', 'HY', 'EU', 'XO' series : int or List[int], optional tenor : str or List[str], optional tenor in years e.g: '3yr', '5yr' date : datetime.date, optional starting date years : int, optional limits many years do we go back starting from today. per: int, optional calculate returns across different time frames """ if df is None: df = get_index_quotes(index, series, tenor, from_date, years) spread_return = df.groupby( level=["index", "series", "tenor", "version"] ).close_spread.pct_change(periods=per) price_return = ( df.groupby(level=["index", "series", "tenor", "version"]).close_price.diff() / 100 ) df = pd.concat( [spread_return, price_return], axis=1, keys=["spread_return", "price_return"] ) df = df.groupby(level=["date", "index", "series", "tenor"]).nth(0) coupon_data = pd.read_sql_query( "SELECT index, series, tenor, coupon * 1e-4 AS coupon, " "maturity FROM " "index_maturity WHERE coupon is NOT NULL", serenitas_engine, index_col=["index", "series", "tenor"], ) df = df.reset_index("date").join(coupon_data).reset_index("tenor") # for some reason pandas doesn't keep the categories, so we have to # do this little dance df.tenor = df.tenor.astype(tenor_t) df = df.set_index("tenor", append=True) df["day_frac"] = df.groupby(level=["index", "series", "tenor"])["date"].transform( lambda s: s.diff().astype("timedelta64[D]") / 360 ) df["price_return"] += df.day_frac * df.coupon df = df.drop(["day_frac", "coupon", "maturity"], axis=1) return df.set_index(["date"], append=True) def get_singlenames_quotes(indexname, date, tenors): r = serenitas_engine.execute( "SELECT * FROM curve_quotes2(%s, %s, %s)", (indexname, date, list(tenors)) ) return list(r) def build_curve(r, tenors): if r["date"] is None: raise ValueError(f"Curve for {r['cds_ticker']} is missing") spread_curve = 1e-4 * np.array(r["spread_curve"], dtype="float") upfront_curve = 1e-2 * np.array(r["upfront_curve"], dtype="float") recovery_curve = np.array(r["recovery_curve"], dtype="float") yc = get_curve(r["date"], r["currency"]) try: sc = SpreadCurve( r["date"], yc, None, None, None, tenors, spread_curve, upfront_curve, recovery_curve, ticker=r["cds_ticker"], seniority=Seniority[r["seniority"]], doc_clause=DocClause[r["doc_clause"]], defaulted=r["event_date"], ) except ValueError as e: print(r[0], e) return r["weight"], None return r["weight"], sc def build_curves(quotes, args): return [build_curve(q, *args) for q in quotes if q is not None] def build_curves_dist(quotes, args, workers=4): # about twice as *slow* as the non distributed version # non thread safe for some reason so need ProcessPool with Pool(workers) as pool: r = pool.starmap(build_curve, [(q, *args) for q in quotes], 30) return r @lru_cache(maxsize=16) def _get_singlenames_curves(index_type, series, trade_date, tenors): sn_quotes = get_singlenames_quotes( f"{index_type.lower()}{series}", trade_date, tenors ) args = (np.array(tenors, dtype="float"),) return build_curves(sn_quotes, args) def get_singlenames_curves( index_type, series, trade_date, tenors=(0.5, 1, 2, 3, 4, 5, 7, 10) ): # tenors need to be a subset of (0.5, 1, 2, 3, 4, 5, 7, 10) if isinstance(trade_date, pd.Timestamp): trade_date = trade_date.date() return _get_singlenames_curves( index_type, series, min(datetime.date.today(), trade_date), tenors ) def get_tranche_quotes(index_type, series, tenor, date=datetime.date.today()): conn = serenitas_pool.getconn() with conn.cursor() as c: c.callproc("get_tranche_quotes", (index_type, series, tenor, date)) df = pd.DataFrame.from_records(dict(d) for d in c) serenitas_pool.putconn(conn) return df