diff options
Diffstat (limited to 'python/analytics/index_data.py')
| -rw-r--r-- | python/analytics/index_data.py | 347 |
1 files changed, 0 insertions, 347 deletions
diff --git a/python/analytics/index_data.py b/python/analytics/index_data.py deleted file mode 100644 index 89c2754d..00000000 --- a/python/analytics/index_data.py +++ /dev/null @@ -1,347 +0,0 @@ -from . import serenitas_engine, serenitas_pool -from dates import bond_cal -import numpy as np - -from .utils import tenor_t, adjust_prev_business_day -from dateutil.relativedelta import relativedelta -from functools import lru_cache -from pyisda.curve import SpreadCurve, Seniority, DocClause, YieldCurve -from multiprocessing import Pool -from yieldcurve import get_curve - -import datetime -import pandas as pd - - -def insert_quotes(): - """ - backpopulate some version i+1 quotes one day before they start trading so - that we get continuous time series when we compute returns. - - We can also do it in sql as follows: - - INSERT INTO index_quotes_pre(date, index, series, version, tenor, close_price, source) - SELECT date, index, series, version+1, tenor, (factor1*closeprice-100*0.355)/factor2, 'MKIT' - FROM index_quotes - WHERE index='HY' and series=23 and date='2017-02-02' - - """ - dates = pd.DatetimeIndex(["2014-05-21", "2015-02-19", "2015-03-05", "2015-06-23"]) - df = pd.read_sql_query( - "SELECT DISTINCT ON (date) * FROM index_quotes " - "WHERE index='HY' AND tenor='5yr' " - "ORDER BY date, series DESC, version DESC", - _engine, - parse_dates=["date"], - index_col=["date"], - ) - df = df.loc[dates] - for tup in df.itertuples(): - result = serenitas_engine.execute( - "SELECT indexfactor, cumulativeloss FROM index_version " - "WHERE index = 'HY' AND series=%s AND version in (%s, %s)" - "ORDER BY version", - (tup.series, tup.version, tup.version + 1), - ) - factor1, cumloss1 = result.fetchone() - factor2, cumloss2 = result.fetchone() - recovery = 1 - (cumloss2 - cumloss1) - version2_price = (factor1 * tup.closeprice - 100 * recovery) / factor2 - print(version2_price) - serenitas_engine.execute( - "INSERT INTO index_quotes(date, index, series, version, tenor, closeprice)" - "VALUES(%s, %s, %s, %s, %s, %s)", - (tup.Index, "HY", tup.series, tup.version + 1, tup.tenor, version2_price), - ) - - -def get_index_quotes( - index=None, - series=None, - tenor=None, - from_date=None, - end_date=None, - years=3, - remove_holidays=True, - source="MKIT", -): - args = locals().copy() - del args["remove_holidays"] - if args["end_date"] is None: - args["end_date"] = datetime.date.today() - if args["years"] is not None: - args["from_date"] = args["end_date"] - relativedelta(years=years) - del args["years"] - - def make_str(key, val): - col_key = key - if isinstance(val, list) or isinstance(val, tuple): - op = "IN" - return "{} IN %({})s".format(key, key) - elif key == "from_date": - col_key = "date" - op = ">=" - elif key == "end_date": - col_key = "date" - op = "<=" - else: - op = "=" - return "{} {} %({})s".format(col_key, op, key) - - where_clause = " AND ".join( - make_str(k, v) for k, v in args.items() if v is not None - ) - sql_str = "SELECT * FROM index_quotes_pre LEFT JOIN index_risk2 USING (id)" - if where_clause: - sql_str = " WHERE ".join([sql_str, where_clause]) - - def make_params(args): - return { - k: tuple(v) if isinstance(v, list) else v - for k, v in args.items() - if v is not None - } - - df = pd.read_sql_query( - sql_str, - serenitas_engine, - parse_dates=["date"], - index_col=["date", "index", "series", "version"], - params=make_params(args), - ) - df.tenor = df.tenor.astype(tenor_t) - df = df.set_index("tenor", append=True) - df.sort_index(inplace=True) - # get rid of US holidays - if remove_holidays: - dates = df.index.levels[0] - if index in ["IG", "HY"]: - holidays = bond_cal().holidays(start=dates[0], end=dates[-1]) - df = df.loc(axis=0)[dates.difference(holidays), :, :] - return df - - -def index_returns( - df=None, - index=None, - series=None, - tenor=None, - from_date=None, - end_date=None, - years=3, - per=1, -): - """computes spreads and price returns - - Parameters - ---------- - df : pandas.DataFrame - index : str or List[str], optional - index type, one of 'IG', 'HY', 'EU', 'XO' - series : int or List[int], optional - tenor : str or List[str], optional - tenor in years e.g: '3yr', '5yr' - date : datetime.date, optional - starting date - years : int, optional - limits many years do we go back starting from today. - per: int, optional - calculate returns across different time frames - - """ - if df is None: - df = get_index_quotes(index, series, tenor, from_date, end_date, years) - spread_return = df.groupby( - level=["index", "series", "tenor", "version"] - ).close_spread.pct_change(periods=per) - price_return = ( - df.groupby(level=["index", "series", "tenor", "version"]).close_price.diff() - / 100 - ) - df = pd.concat( - [spread_return, price_return], axis=1, keys=["spread_return", "price_return"] - ) - df = df.groupby(level=["date", "index", "series", "tenor"]).nth(0) - coupon_data = pd.read_sql_query( - "SELECT index, series, tenor, coupon * 1e-4 AS coupon, " - "maturity FROM " - "index_maturity WHERE coupon is NOT NULL", - serenitas_engine, - index_col=["index", "series", "tenor"], - ) - df = df.reset_index("date").join(coupon_data).reset_index("tenor") - # for some reason pandas doesn't keep the categories, so we have to - # do this little dance - df.tenor = df.tenor.astype(tenor_t) - df = df.set_index("tenor", append=True) - df["day_frac"] = df.groupby(level=["index", "series", "tenor"])["date"].transform( - lambda s: s.diff().astype("timedelta64[D]") / 360 - ) - df["price_return"] += df.day_frac * df.coupon - df = df.drop(["day_frac", "coupon", "maturity"], axis=1) - return df.set_index(["date"], append=True) - - -def get_singlenames_quotes(indexname, date, tenors): - r = serenitas_engine.execute( - "SELECT * FROM curve_quotes2(%s, %s, %s)", (indexname, date, list(tenors)) - ) - return list(r) - - -def build_curve(r, tenors): - if r["date"] is None: - raise ValueError(f"Curve for {r['cds_ticker']} is missing") - spread_curve = 1e-4 * np.array(r["spread_curve"], dtype="float") - upfront_curve = 1e-2 * np.array(r["upfront_curve"], dtype="float") - recovery_curve = np.array(r["recovery_curve"], dtype="float") - yc = get_curve(r["date"], r["currency"]) - try: - sc = SpreadCurve( - r["date"], - yc, - None, - None, - None, - tenors, - spread_curve, - upfront_curve, - recovery_curve, - ticker=r["cds_ticker"], - seniority=Seniority[r["seniority"]], - doc_clause=DocClause[r["doc_clause"]], - defaulted=r["event_date"], - ) - except ValueError as e: - print(r[0], e) - return r["weight"], None - return r["weight"], sc - - -def build_curves(quotes, args): - return [build_curve(q, *args) for q in quotes if q is not None] - - -def build_curves_dist(quotes, args, workers=4): - # about twice as *slow* as the non distributed version - # non thread safe for some reason so need ProcessPool - with Pool(workers) as pool: - r = pool.starmap(build_curve, [(q, *args) for q in quotes], 30) - return r - - -@lru_cache(maxsize=16) -def _get_singlenames_curves(index_type, series, trade_date, tenors): - sn_quotes = get_singlenames_quotes( - f"{index_type.lower()}{series}", trade_date, tenors - ) - args = (np.array(tenors, dtype="float"),) - return build_curves_dist(sn_quotes, args) - - -def get_singlenames_curves( - index_type, series, trade_date, tenors=(0.5, 1, 2, 3, 4, 5, 7, 10), use_cache=True -): - # tenors need to be a subset of (0.5, 1, 2, 3, 4, 5, 7, 10) - if isinstance(trade_date, pd.Timestamp): - trade_date = trade_date.date() - if use_cache: - fun = _get_singlenames_curves - else: - fun = _get_singlenames_curves.__wrapped__ - return fun(index_type, series, min(datetime.date.today(), trade_date), tenors) - - -def get_singlenames_curves_prebuilt(conn, index_type, series, trade_date): - """ load cds curves directly from cds_curves table """ - if isinstance(trade_date, datetime.datetime): - trade_date = trade_date.date() - - trade_date = adjust_prev_business_day(trade_date) - with conn.cursor() as c: - c.execute( - "SELECT * FROM index_curves(%s, %s)", (f"{index_type}{series}", trade_date) - ) - r = [(w, SpreadCurve.from_bytes(b, True)) for w, b in c] - return r - - -def load_all_curves(conn, trade_date): - with conn.cursor() as c: - c.execute( - "SELECT curve, referenceentity, company_id FROM cds_curves " - "LEFT JOIN refentity ON redcode=redentitycode WHERE date=%s", - (trade_date,), - ) - r = [(name, SpreadCurve.from_bytes(b, True), cid) for (b, name, cid) in c] - r = pd.DataFrame.from_records( - r, - columns=["name", "curve", "company_id"], - index=[c.full_ticker for _, c, _ in r], - ) - return r.loc[r.index.drop_duplicates()] - - -def get_tranche_quotes( - index_type, series, tenor, date=datetime.date.today(), source="Serenitas" -): - conn = serenitas_pool.getconn() - with conn.cursor() as c: - if source == "Serenitas": - c.callproc("get_tranche_quotes", (index_type, series, tenor, date)) - else: - sql_str = ( - "SELECT id, attach, detach, upfront_mid AS trancheupfrontmid, " - "tranche_spread AS trancherunningmid, " - "100*index_price AS indexrefprice, NULL AS indexrefspread " - "FROM markit_tranche_quotes " - "JOIN index_version USING (basketid) " - "WHERE index=%s AND series=%s AND tenor=%s AND quotedate=%s " - "ORDER BY attach" - ) - c.execute(sql_str, (index_type, series, tenor, date)) - col_names = [col.name for col in c.description] - df = pd.DataFrame.from_records((tuple(r) for r in c), columns=col_names) - serenitas_pool.putconn(conn) - return df - - -def get_singlename_curve( - ticker: str, - seniority: str, - doc_clause: str, - value_date: datetime.date, - yieldcurve: YieldCurve, - source: str = "MKIT", -): - conn = serenitas_pool.getconn() - with conn.cursor() as c: - c.execute( - "SELECT * FROM cds_quotes " - "JOIN (SELECT UNNEST(cds_curve) AS curve_ticker, " - " UNNEST(ARRAY[0.5, 1., 2., 3., 4., 5., 7., 10.]::float[]) AS tenor" - " FROM bbg_issuers" - " JOIN bbg_markit_mapping USING (company_id, seniority)" - " WHERE markit_ticker=%s and seniority=%s) a " - "USING (curve_ticker) WHERE date=%s AND source=%s ORDER BY tenor", - (ticker, seniority, value_date, source), - ) - df = pd.DataFrame(c, columns=[col.name for col in c.description]) - serenitas_pool.putconn(conn) - spread_curve = 0.5 * (df.runningbid + df.runningask).values * 1e-4 - upfront_curve = 0.5 * (df.upfrontbid + df.upfrontask).values * 1e-2 - return SpreadCurve( - value_date, - yieldcurve, - None, - None, - None, - df.tenor.values, - spread_curve, - upfront_curve, - df.recovery.values, - ticker=ticker, - seniority=Seniority[seniority], - doc_clause=DocClause[doc_clause], - defaulted=None, - ) |
